def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # Load image. image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # Panoptic label is encoded in RGB image. pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") # Reuses semantic transform for panoptic labels. aug_input = T.AugInput(image, sem_seg=pan_seg_gt) _ = self.augmentations(aug_input) image, pan_seg_gt = aug_input.image, aug_input.sem_seg # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # Generates training targets for Panoptic-DeepLab. targets = self.panoptic_target_generator(rgb2id(pan_seg_gt), dataset_dict["segments_info"]) dataset_dict.update(targets) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept (classification only) """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) except OSError: return if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Transform the dataset_dict according to the configured transformations. Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a new dict that's going to be processed by the model. It currently does the following: 1. Read the image from "file_name" 2. Transform the image and annotations 3. Prepare the annotations to :class:`Instances` """ # get 3D models for each annotations and remove 3D mesh models from image dict mesh_models = [] if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: mesh_models.append( [ self._all_mesh_models[anno["mesh"]][0].clone(), self._all_mesh_models[anno["mesh"]][1].clone(), ] ) dataset_dict = {key: value for key, value in dataset_dict.items() if key != "mesh_models"} dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if "annotations" in dataset_dict: for i, anno in enumerate(dataset_dict["annotations"]): anno["mesh"] = mesh_models[i] image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: annos = [ self.transform_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] # Should not be empty during training instances = annotations_to_instances(annos, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): self.tfm_gens = [] dataset_dict = deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.is_train: # Crop if 'crop' in self.da.keys(): crop_gen = T.RandomCrop(self.da['crop']['type'], self.da['crop']['size']) self.tfm_gens.append(crop_gen) # Horizontal flip if 'flip' in self.da.keys(): flip_gen = T.RandomFlip( prob=self.da['flip']['prob'], horizontal=self.da['flip']['horizontal'], vertical=self.da['flip']['vertical']) self.tfm_gens.append(flip_gen) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.augmentation, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data # USER: Don't call transpose_densepose if you don't need annos = [ self._transform_densepose( utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices), transforms, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] if self.mask_on: self._add_densepose_masks_as_segmentation(annos, image_shape) instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") densepose_annotations = [obj.get("densepose") for obj in annos] if densepose_annotations and not all(v is None for v in densepose_annotations): instances.gt_densepose = DensePoseList(densepose_annotations, instances.gt_boxes, image_shape) dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: if np.random.rand() > 0.5: image, transforms = T.apply_transform_gens( self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # Maps points from the closed interval [0, image_size - 1] on discrete # image coordinates to the half-open interval [x1, x2) on continuous image # coordinates. We use the continuous-discrete conversion from Heckbert # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5, # where d is a discrete coordinate and c is a continuous coordinate. for ann in dataset_dict["annotations"]: point_coords_wrt_image = np.array(ann["point_coords"]).astype(np.float) point_coords_wrt_image = point_coords_wrt_image + 0.5 ann["point_coords"] = point_coords_wrt_image annos = [ # also need to transform point coordinates transform_instance_annotations( obj, transforms, image_shape, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances( annos, image_shape, sample_points=self.sample_points, ) dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file # image = utils.read_image(dataset_dict["file_name"], format=self.img_format) rota = 0 if self.rota_aug_on and dataset_dict["split"] != "val_mini" and dataset_dict["split"] != "test": rotaed_aug = [0, 90, 180, 270] rota = random.sample(rotaed_aug, 1)[0] image = read_image(dataset_dict["file_name"], format=self.img_format, rota=rota) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ transform_dota_instance_annotations( obj, image_shape, rota, transforms ) for obj in dataset_dict.pop("annotations") ] instances = dota_annotations_to_instances( annos, image_shape ) dataset_dict["instances"] = filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): self.tfm_gens = [] dataset_dict = deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.is_train: # Crop '''print("Augmentation: ", "T.RandomCrop('relative', [0.8, 0.4])") crop_gen = T.RandomCrop('relative', [0.8, 0.4]) self.tfm_gens.append(crop_gen)''' # Horizontal flip print("Augmentation: ", "T.RandomFlip(prob=0.5, horizontal=True, vertical=False)") flip_gen = T.RandomFlip(prob=0.5, horizontal=True, vertical=False) self.tfm_gens.append(flip_gen) image, transforms = T.apply_transform_gens(self.tfm_gens, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) image, transforms = T.apply_transform_gens(self.augmentation, image) image_shape = image.shape[:2] # h, w dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict annos = [ utils.transform_instance_annotations(obj, transforms, image.shape[:2]) for obj in dataset_dict.pop("annotations") ] dataset_dict["instances"] = utils.annotations_to_instances( annos, image.shape[:2]) # # USER: Implement additional transformations if you have other types of data # # USER: Don't call transpose_densepose if you don't need # annos = [ # self._transform_densepose( # utils.transform_instance_annotations( # obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices # ), # transforms, # ) # for obj in dataset_dict.pop("annotations") # if obj.get("iscrowd", 0) == 0 # ] # instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask") # dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()] return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if "annotations" in dataset_dict: # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of ONE video, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below if self.is_train: dataset_dict = # TODO: sample a fixed number of frames new_dataset_dict = [] for item in dataset_dict: image = utils.read_image(item["filename"], format=self.img_format) utils.check_image_size(item, image) # TODO: SSD random crop image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image ) image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) sample = {"image": image} if not self.is_train: new_dataset_dict.append(sample) continue # USER: Implement additional transformations if you have other types of data boxes = [ utils.transform_instance_annotations(box, transforms, image_shape) for box in item["boxes"] ] instances = # sample["instances"] = instances new_dataset_dict.append(sample) return new_dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) assert "sem_seg_file_name" in dataset_dict image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.is_train: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) if self.crop_gen: image, sem_seg_gt = crop_transform( image, sem_seg_gt, self.crop_gen, self.single_category_max_area, self.ignore_value, ) dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("sem_seg_file_name", None) return dataset_dict return dataset_dict
def _load_image_with_annos(self, dataset_dict): """ Load the image and annotations given a dataset_dict. """ # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] # h, w if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return image, None if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other # types of data # apply meta_infos for mosaic transformation annos = [ transform_instance_annotations( obj, transforms, image_shape, add_meta_infos=self.add_meta_infos) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] else: annos = None return image, annos
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ # todo: decide wheather dataset_dict is needed dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # ? image_list = [] for idx, ann in enumerate(dataset_dict['annotations']): #bbox = BoxMode.convert(ann['bbox'], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) bbox = [int(x) for x in ann['bbox']] image_crop = image[bbox[1]:(bbox[1] + bbox[3]), bbox[0]:(bbox[0] + bbox[2])] #image_crop, _ = T.apply_transform_gens(self.tfm_gens, image_crop) #image_crop = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) image_crop = TF.to_tensor(np.ascontiguousarray(image_crop)) if ann['category_id'] not in [0]: # TODO: explicit list! image_list.append({ "image": image_crop, "height": bbox[3], "width": bbox[2], "file_name": dataset_dict["file_name"], "box_id": idx, "bbox": bbox, 'category_id': ann['category_id'], # TODO: confidence }) #dataset_dict["image_list"] = image_list new_dict = {"image_list": image_list} return new_dict #dataset_dict
def __call__(self, dataset_dict): """ :param dataset_dict: :return: """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) # first resize, then crop if self.is_train: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []), image) # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if not self.is_train: mask = np.load(dataset_dict["mask_file_name"]) # mask = transforms.apply_segmentation(mask) mask = torch.as_tensor(mask.astype(np.float32)[None]) # (1, H, W) dataset_dict["mask"] = mask return dataset_dict # Option 1: randomly generate brush stroke as Yu et al. 2019 if self.mask_type == "random_regular": raise NotImplementedError elif self.mask_type == "random_irregular": mask = self.generate_random_stroke_mask(image.shape[:2]) else: raise ValueError(f"Unexpected mask type, got {self.mask_type}") mask = torch.as_tensor(mask.astype(np.float32)[None]) # (1, H, W) dataset_dict["mask"] = mask return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict['file_name'], format=self.img_format) utils.check_image_size(dataset_dict, image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) elif np.random.rand() > 0.5: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) image_shape = image.shape[:2] dataset_dict['image'] = paddle.to_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop('annotations', None) return dataset_dict if 'annotations' in dataset_dict: for anno in dataset_dict['annotations']: if not self.mask_on: anno.pop('segmentation', None) anno.pop('keypoints', None) annos = [ utils.transform_instance_annotations(obj, transforms, image_shape) for obj in dataset_dict.pop('annotations') if obj.get('iscrowd', 0) == 0 ] instances = utils.annotations_to_instances(annos, image_shape) dataset_dict['instances'] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: if "category_colors" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "RGB") sem_seg_gt = rgb2mask(sem_seg_gt, dataset_dict["category_colors"]) else : sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L") sem_seg_gt = sem_seg_gt.squeeze(2) else : sem_seg_gt=None aug_input = T.AugInput(image, sem_seg=sem_seg_gt) transforms = self.augmentations(aug_input) image, sem_seg_gt = aug_input.image, aug_input.sem_seg # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt,dtype=torch.long) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("sem_seg_file_name", None) return dataset_dict return dataset_dict
def __call__(self, dataset_dict): dataset_dict = copy.deepcopy(dataset_dict) image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) aug_input = T.AugInput(image, sem_seg=None) transforms = self.augmentations(aug_input) image = aug_input.image image_shape = image.shape[:2] dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if not self.is_train: dataset_dict.pop("annotations", None) return dataset_dict if "annotations" in dataset_dict: annos = [ self.transform_instance_annotations_rotated( obj, transforms, image_shape) # obj for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances_rotated( annos, image_shape) if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file try: image = utils.read_image(dataset_dict["file_name"], format=self.img_format) except Exception as e: print(dataset_dict["file_name"]) print(e) raise e try: utils.check_image_size(dataset_dict, image) except SizeMismatchError as e: expected_wh = (dataset_dict["width"], dataset_dict["height"]) image_wh = (image.shape[1], image.shape[0]) if (image_wh[1], image_wh[0]) == expected_wh: print("transposing image {}".format(dataset_dict["file_name"])) image = image.transpose(1, 0, 2) else: raise e if "annotations" not in dataset_dict or len( dataset_dict["annotations"]) == 0: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], dataset_dict["annotations"], crop_box=self.crop_box, ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) dataset_dict.pop("pano_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt if self.basis_loss_on and self.is_train: # load basis supervisions if self.ann_set == "coco": basis_sem_path = dataset_dict["file_name"].replace( 'train2017', 'thing_train2017').replace('image/train', 'thing_train') else: basis_sem_path = dataset_dict["file_name"].replace( 'coco', 'lvis').replace('train2017', 'thing_train').replace('jpg', 'npz') basis_sem_path = basis_sem_path.replace('jpg', 'npz') basis_sem_gt = np.load(basis_sem_path)["mask"] basis_sem_gt = transforms.apply_segmentation(basis_sem_gt) basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long")) dataset_dict["basis_sem"] = basis_sem_gt return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) image = transform(image=image)["image"] ############################################################################ ''' image = utils.read_image(dataset_dict["file_name"], format=self.image_format) h, w, _ = image.shape utils.check_image_size(dataset_dict, image) bboxes = [ann["bbox"] for ann in dataset_dict['annotations']] labels = [ann['category_id'] for ann in dataset_dict['annotations']] class_labels = [CLASSES[label] for label in labels] segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']] #cprint("before :" , segmentations) masks = convert_coco_poly_to_mask(segmentations, h, w) masks = [mask.numpy() for mask in masks] transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks) image = transformed["image"] bboxes = transformed["bboxes"] class_labels = transformed["class_labels"] labels = [CLASSES.index(cl) for cl in class_labels] filtered_masks = [] for mask in transformed["masks"]: #if len(np.unique(mask)) > 1: filtered_masks.append(mask) if len(bboxes) != len(filtered_masks): print(len(bboxes), len(filtered_masks), len(labels)) #print(len(bboxes), len(masks), len(labels)) seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks] for idx in range(len(labels)): dataset_dict['annotations'][idx]["bbox"] = bboxes[idx] dataset_dict['annotations'][idx]["labels"] = labels[idx] dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx] dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)] ''' # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk ) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format ) # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.image_format) utils.check_image_size(dataset_dict, image) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: sem_seg_gt = utils.read_image( dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2) else: sem_seg_gt = None aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt) transforms = aug_input.apply_augmentations(self.augmentations) image, sem_seg_gt = aug_input.image, aug_input.sem_seg image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) if sem_seg_gt is not None: dataset_dict["sem_seg"] = torch.as_tensor( sem_seg_gt.astype("long")) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.proposal_topk is not None: utils.transform_proposals(dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.use_instance_mask: anno.pop("segmentation", None) if not self.use_keypoint: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.instance_mask_format) # @ Will Lee 精细分类类别:非标准,基本标准 or 标准 standard_ids = [obj["standard_id"] for obj in annos] standard_ids = torch.tensor(standard_ids, dtype=torch.int64) instances.gt_standards = standard_ids # After transforms such as cropping are applied, the bounding box may no longer # tightly bound the object. As an example, imagine a triangle object # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to # the intersection of original bounding box and the cropping box. if self.recompute_boxes: instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) ### my code ### ## segmentaion の annotation を一旦退避して、後で追加する seg_bk = [ dictwk["segmentation"] for dictwk in dataset_dict["annotations"] ] for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i].pop("segmentation") image, dataset_dict = self.aug_handler( image=image, dataset_dict_detectron=dataset_dict) for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = seg_bk[i] ### my code ### if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt return dataset_dict
def __call__(self, dataset_dict): image = utils.read_image(dataset_dict["file_name"], format="BGR") utils.check_image_size(dataset_dict, image) data_transformations = [] if self.is_train: # Crop if self.crop: crop_gen = T.RandomCrop(self.crop_type, self.crop_size) data_transformations.append(crop_gen) print('crop') # Horizontal flip if self.flip: flip_gen = T.RandomFlip() data_transformations.append(flip_gen) # if self.rotation: # rotation_gen = T.RandomRotation([0, 90]) # data_transformations.append(rotation_gen) if self.saturation: saturation_gen = T.RandomSaturation(0.5, 1.5) data_transformations.append(saturation_gen) print(str(dataset_dict["file_name"])) image, transforms = T.apply_transform_gens(data_transformations, image) print( '\n\n -------------------PRINTING IMAGE---------------------- \n\n' ) img_name = dataset_dict["file_name"][len(dataset_dict["file_name"]) - 15:len(dataset_dict["file_name"] ) - 4] img_name = '/home/grupo01/images_augmented/' + img_name + '_augmented.png' print(len(dataset_dict["file_name"])) print(img_name) cv2.imwrite(img_name, image) dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) image_shape = image.shape[:2] if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=None) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ # cur. cur_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # pre. pre_dict = self._load_pre_data( cur_dict["video_id"], cur_dict["frame_id"], cur_dict["sensor_id"] if "sensor_id" in cur_dict else 1) # we use zip loading. # image = utils.read_image(dataset_dict["file_name"], format=self.img_format) image = zipimread(cur_dict["file_name"]) pre_image = zipimread(pre_dict["file_name"]) if self.zip_read: if self.img_format == "RGB": image = image[:, :, ::-1] pre_image = pre_image[:, :, ::-1] else: raise NotImplementedError utils.check_image_size(cur_dict, image) utils.check_image_size(pre_dict, pre_image) if self.crop_gen is None: image, transforms = T.apply_transform_gens(self.tfm_gens, image) else: if np.random.rand() > 0.5: image, transforms = T.apply_transform_gens( self.tfm_gens, image) else: image, transforms = T.apply_transform_gens( self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image) # apply the same transform to pre frame. pre_image = transforms.apply_image(pre_image) image_shape = image.shape[:2] # h, w pre_image_shape = pre_image.shape[:2] assert image_shape == pre_image_shape # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. cur_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) pre_dict["image"] = torch.as_tensor( np.ascontiguousarray(pre_image.transpose(2, 0, 1))) if not self.is_train: # USER: Modify this if you want to keep them for some reason. cur_dict.pop("annotations", None) pre_dict.pop("annotations", None) return cur_dict, pre_dict if "annotations" in cur_dict and "annotations" in pre_dict: # cur. annos = [ transform_instance_annotations(obj, transforms, image_shape) for obj in cur_dict["annotations"] if obj.get("iscrowd", 0) == 0 ] instances = annotations_to_instances(annos, image_shape) cur_dict["instances"] = filter_empty_instances(instances) # pre. pre_annos = [ transform_instance_annotations(obj, transforms, pre_image_shape) for obj in pre_dict["annotations"] if obj.get("iscrowd", 0) == 0 ] pre_instances = annotations_to_instances(pre_annos, pre_image_shape) pre_dict["instances"] = filter_empty_instances(pre_instances) return cur_dict, pre_dict
def read_data(self, dataset_dict): """load image and annos random shift & scale bbox; crop, rescale.""" cfg = self.cfg r_head_cfg = cfg.MODEL.CDPN.ROT_HEAD pnp_net_cfg = cfg.MODEL.CDPN.PNP_NET dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below dataset_name = dataset_dict["dataset_name"] image = read_image_cv2(dataset_dict["file_name"], format=self.img_format) # should be consistent with the size in dataset_dict utils.check_image_size(dataset_dict, image) im_H_ori, im_W_ori = image.shape[:2] # currently only replace bg for train ############################### if self.split == "train": # some synthetic data already has bg, img_type should be real or something else but not syn img_type = dataset_dict.get("img_type", "real") if img_type == "syn": log_first_n(logging.WARNING, "replace bg", n=10) assert "segmentation" in dataset_dict["inst_infos"] mask = cocosegm2mask( dataset_dict["inst_infos"]["segmentation"], im_H_ori, im_W_ori) image, mask_trunc = self.replace_bg(image.copy(), mask, return_mask=True) else: # real image if np.random.rand() < cfg.INPUT.CHANGE_BG_PROB: log_first_n(logging.WARNING, "replace bg for real", n=10) assert "segmentation" in dataset_dict["inst_infos"] mask = cocosegm2mask( dataset_dict["inst_infos"]["segmentation"], im_H_ori, im_W_ori) image, mask_trunc = self.replace_bg(image.copy(), mask, return_mask=True) else: mask_trunc = None # NOTE: maybe add or change color augment here =================================== if self.split == "train" and self.color_aug_prob > 0 and self.color_augmentor is not None: if np.random.rand() < self.color_aug_prob: if cfg.INPUT.COLOR_AUG_SYN_ONLY and img_type not in ["real"]: image = self._color_aug(image, self.color_aug_type) else: image = self._color_aug(image, self.color_aug_type) # other transforms (mainly geometric ones); # for 6d pose task, flip is now allowed in general except for some 2d keypoints methods image, transforms = T.apply_augmentations(self.augmentation, image) im_H, im_W = image_shape = image.shape[:2] # h, w # NOTE: scale camera intrinsic if necessary ================================ scale_x = im_W / im_W_ori scale_y = im_H / im_H_ori # NOTE: generally scale_x should be equal to scale_y if "cam" in dataset_dict: if im_W != im_W_ori or im_H != im_H_ori: dataset_dict["cam"][0] *= scale_x dataset_dict["cam"][1] *= scale_y K = dataset_dict["cam"].astype("float32") dataset_dict["cam"] = torch.as_tensor(K) input_res = cfg.MODEL.CDPN.BACKBONE.INPUT_RES out_res = cfg.MODEL.CDPN.BACKBONE.OUTPUT_RES # CHW -> HWC coord_2d = get_2d_coord_np(im_W, im_H, low=0, high=1).transpose(1, 2, 0) ################################################################################# if self.split != "train": # don't load annotations at test time test_bbox_type = cfg.TEST.TEST_BBOX_TYPE if test_bbox_type == "gt": bbox_key = "bbox" else: bbox_key = f"bbox_{test_bbox_type}" assert not self.flatten, "Do not use flattened dicts for test!" # here get batched rois roi_infos = {} # yapf: disable roi_keys = ["scene_im_id", "file_name", "cam", "im_H", "im_W", "roi_img", "inst_id", "roi_coord_2d", "roi_cls", "score", "roi_extent", bbox_key, "bbox_mode", "bbox_center", "roi_wh", "scale", "resize_ratio", "model_info", ] for _key in roi_keys: roi_infos[_key] = [] # yapf: enable # TODO: how to handle image without detections # filter those when load annotations or detections, implement a function for this # "annotations" means detections for inst_i, inst_infos in enumerate(dataset_dict["annotations"]): # inherent image-level infos roi_infos["scene_im_id"].append(dataset_dict["scene_im_id"]) roi_infos["file_name"].append(dataset_dict["file_name"]) roi_infos["im_H"].append(im_H) roi_infos["im_W"].append(im_W) roi_infos["cam"].append(dataset_dict["cam"].cpu().numpy()) # roi-level infos roi_infos["inst_id"].append(inst_i) roi_infos["model_info"].append(inst_infos["model_info"]) roi_cls = inst_infos["category_id"] roi_infos["roi_cls"].append(roi_cls) roi_infos["score"].append(inst_infos["score"]) # extent roi_extent = self._get_extents(dataset_name)[roi_cls] roi_infos["roi_extent"].append(roi_extent) bbox = BoxMode.convert(inst_infos[bbox_key], inst_infos["bbox_mode"], BoxMode.XYXY_ABS) bbox = np.array(transforms.apply_box([bbox])[0]) roi_infos[bbox_key].append(bbox) roi_infos["bbox_mode"].append(BoxMode.XYXY_ABS) x1, y1, x2, y2 = bbox bbox_center = np.array([0.5 * (x1 + x2), 0.5 * (y1 + y2)]) bw = max(x2 - x1, 1) bh = max(y2 - y1, 1) scale = max(bh, bw) * cfg.INPUT.DZI_PAD_SCALE scale = min(scale, max(im_H, im_W)) * 1.0 roi_infos["bbox_center"].append(bbox_center.astype("float32")) roi_infos["scale"].append(scale) roi_infos["roi_wh"].append(np.array([bw, bh], dtype=np.float32)) roi_infos["resize_ratio"].append(out_res / scale) # CHW, float32 tensor # roi_image roi_img = crop_resize_by_warp_affine( image, bbox_center, scale, input_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) roi_img = self.normalize_image(cfg, roi_img) roi_infos["roi_img"].append(roi_img.astype("float32")) # roi_coord_2d roi_coord_2d = crop_resize_by_warp_affine( coord_2d, bbox_center, scale, out_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) # HWC -> CHW roi_infos["roi_coord_2d"].append( roi_coord_2d.astype("float32")) for _key in roi_keys: if _key in ["roi_img", "roi_coord_2d"]: dataset_dict[_key] = torch.as_tensor( roi_infos[_key]).contiguous() elif _key in ["model_info", "scene_im_id", "file_name"]: # can not convert to tensor dataset_dict[_key] = roi_infos[_key] else: dataset_dict[_key] = torch.tensor(roi_infos[_key]) return dataset_dict ####################################################################################### # NOTE: currently assume flattened dicts for train assert self.flatten, "Only support flattened dicts for train now" inst_infos = dataset_dict.pop("inst_infos") dataset_dict["roi_cls"] = roi_cls = inst_infos["category_id"] # extent roi_extent = self._get_extents(dataset_name)[roi_cls] dataset_dict["roi_extent"] = torch.tensor(roi_extent, dtype=torch.float32) # load xyz ======================================================= xyz_info = mmcv.load(inst_infos["xyz_path"]) x1, y1, x2, y2 = xyz_info["xyxy"] # float16 does not affect performance (classification/regresion) xyz_crop = xyz_info["xyz_crop"] xyz = np.zeros((im_H, im_W, 3), dtype=np.float32) xyz[y1:y2 + 1, x1:x2 + 1, :] = xyz_crop # NOTE: full mask mask_obj = ((xyz[:, :, 0] != 0) | (xyz[:, :, 1] != 0) | (xyz[:, :, 2] != 0)).astype(np.bool).astype(np.float32) if cfg.INPUT.SMOOTH_XYZ: xyz = self.smooth_xyz(xyz) if cfg.TRAIN.VIS: xyz = self.smooth_xyz(xyz) # override bbox info using xyz_infos inst_infos["bbox"] = [x1, y1, x2, y2] inst_infos["bbox_mode"] = BoxMode.XYXY_ABS # USER: Implement additional transformations if you have other types of data # inst_infos.pop("segmentation") # NOTE: use mask from xyz anno = transform_instance_annotations(inst_infos, transforms, image_shape, keypoint_hflip_indices=None) # augment bbox =================================================== bbox_xyxy = anno["bbox"] bbox_center, scale = self.aug_bbox(cfg, bbox_xyxy, im_H, im_W) bw = max(bbox_xyxy[2] - bbox_xyxy[0], 1) bh = max(bbox_xyxy[3] - bbox_xyxy[1], 1) # CHW, float32 tensor ## roi_image ------------------------------------ roi_img = crop_resize_by_warp_affine( image, bbox_center, scale, input_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) roi_img = self.normalize_image(cfg, roi_img) # roi_coord_2d ---------------------------------------------------- roi_coord_2d = crop_resize_by_warp_affine( coord_2d, bbox_center, scale, out_res, interpolation=cv2.INTER_LINEAR).transpose(2, 0, 1) ## roi_mask --------------------------------------- # (mask_trunc < mask_visib < mask_obj) mask_visib = anno["segmentation"].astype("float32") * mask_obj if mask_trunc is None: mask_trunc = mask_visib else: mask_trunc = mask_visib * mask_trunc.astype("float32") if cfg.TRAIN.VIS: mask_xyz_interp = cv2.INTER_LINEAR else: mask_xyz_interp = cv2.INTER_NEAREST # maybe truncated mask (true mask for rgb) roi_mask_trunc = crop_resize_by_warp_affine( mask_trunc[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) # use original visible mask to calculate xyz loss (try full obj mask?) roi_mask_visib = crop_resize_by_warp_affine( mask_visib[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) roi_mask_obj = crop_resize_by_warp_affine( mask_obj[:, :, None], bbox_center, scale, out_res, interpolation=mask_xyz_interp) ## roi_xyz ---------------------------------------------------- roi_xyz = crop_resize_by_warp_affine(xyz, bbox_center, scale, out_res, interpolation=mask_xyz_interp) # region label if r_head_cfg.NUM_REGIONS > 1: fps_points = self._get_fps_points(dataset_name)[roi_cls] roi_region = xyz_to_region(roi_xyz, fps_points) # HW dataset_dict["roi_region"] = torch.as_tensor( roi_region.astype(np.int32)).contiguous() roi_xyz = roi_xyz.transpose(2, 0, 1) # HWC-->CHW # normalize xyz to [0, 1] using extent roi_xyz[0] = roi_xyz[0] / roi_extent[0] + 0.5 roi_xyz[1] = roi_xyz[1] / roi_extent[1] + 0.5 roi_xyz[2] = roi_xyz[2] / roi_extent[2] + 0.5 if ("CE" in r_head_cfg.XYZ_LOSS_TYPE) or ( "cls" in cfg.MODEL.CDPN.NAME): # convert target to int for cls # assume roi_xyz has been normalized in [0, 1] roi_xyz_bin = np.zeros_like(roi_xyz) roi_x_norm = roi_xyz[0] roi_x_norm[roi_x_norm < 0] = 0 # clip roi_x_norm[roi_x_norm > 0.999999] = 0.999999 # [0, BIN-1] roi_xyz_bin[0] = np.asarray(roi_x_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) roi_y_norm = roi_xyz[1] roi_y_norm[roi_y_norm < 0] = 0 roi_y_norm[roi_y_norm > 0.999999] = 0.999999 roi_xyz_bin[1] = np.asarray(roi_y_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) roi_z_norm = roi_xyz[2] roi_z_norm[roi_z_norm < 0] = 0 roi_z_norm[roi_z_norm > 0.999999] = 0.999999 roi_xyz_bin[2] = np.asarray(roi_z_norm * r_head_cfg.XYZ_BIN, dtype=np.uint8) # the last bin is for bg roi_masks = { "trunc": roi_mask_trunc, "visib": roi_mask_visib, "obj": roi_mask_obj } roi_mask_xyz = roi_masks[r_head_cfg.XYZ_LOSS_MASK_GT] roi_xyz_bin[0][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN roi_xyz_bin[1][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN roi_xyz_bin[2][roi_mask_xyz == 0] = r_head_cfg.XYZ_BIN if "CE" in r_head_cfg.XYZ_LOSS_TYPE: dataset_dict["roi_xyz_bin"] = torch.as_tensor( roi_xyz_bin.astype("uint8")).contiguous() if "/" in r_head_cfg.XYZ_LOSS_TYPE and len( r_head_cfg.XYZ_LOSS_TYPE.split("/")[1]) > 0: dataset_dict["roi_xyz"] = torch.as_tensor( roi_xyz.astype("float32")).contiguous() else: dataset_dict["roi_xyz"] = torch.as_tensor( roi_xyz.astype("float32")).contiguous() # pose targets ---------------------------------------------------------------------- pose = inst_infos["pose"] allo_pose = egocentric_to_allocentric(pose) quat = inst_infos["quat"] allo_quat = mat2quat(allo_pose[:3, :3]) # ====== actually not needed ========== if pnp_net_cfg.ROT_TYPE == "allo_quat": dataset_dict["allo_quat"] = torch.as_tensor( allo_quat.astype("float32")) elif pnp_net_cfg.ROT_TYPE == "ego_quat": dataset_dict["ego_quat"] = torch.as_tensor(quat.astype("float32")) # rot6d elif pnp_net_cfg.ROT_TYPE == "ego_rot6d": dataset_dict["ego_rot6d"] = torch.as_tensor( mat_to_ortho6d_np(pose[:3, :3].astype("float32"))) elif pnp_net_cfg.ROT_TYPE == "allo_rot6d": dataset_dict["allo_rot6d"] = torch.as_tensor( mat_to_ortho6d_np(allo_pose[:3, :3].astype("float32"))) # log quat elif pnp_net_cfg.ROT_TYPE == "ego_log_quat": dataset_dict["ego_log_quat"] = quaternion_lf.qlog( torch.as_tensor(quat.astype("float32"))[None])[0] elif pnp_net_cfg.ROT_TYPE == "allo_log_quat": dataset_dict["allo_log_quat"] = quaternion_lf.qlog( torch.as_tensor(allo_quat.astype("float32"))[None])[0] # lie vec elif pnp_net_cfg.ROT_TYPE == "ego_lie_vec": dataset_dict["ego_lie_vec"] = lie_algebra.rot_to_lie_vec( torch.as_tensor(pose[:3, :3].astype("float32")[None]))[0] elif pnp_net_cfg.ROT_TYPE == "allo_lie_vec": dataset_dict["allo_lie_vec"] = lie_algebra.rot_to_lie_vec( torch.as_tensor(allo_pose[:3, :3].astype("float32"))[None])[0] else: raise ValueError(f"Unknown rot type: {pnp_net_cfg.ROT_TYPE}") dataset_dict["ego_rot"] = torch.as_tensor( pose[:3, :3].astype("float32")) dataset_dict["trans"] = torch.as_tensor( inst_infos["trans"].astype("float32")) dataset_dict["roi_points"] = torch.as_tensor( self._get_model_points(dataset_name)[roi_cls].astype("float32")) dataset_dict["sym_info"] = self._get_sym_infos(dataset_name)[roi_cls] dataset_dict["roi_img"] = torch.as_tensor( roi_img.astype("float32")).contiguous() dataset_dict["roi_coord_2d"] = torch.as_tensor( roi_coord_2d.astype("float32")).contiguous() dataset_dict["roi_mask_trunc"] = torch.as_tensor( roi_mask_trunc.astype("float32")).contiguous() dataset_dict["roi_mask_visib"] = torch.as_tensor( roi_mask_visib.astype("float32")).contiguous() dataset_dict["roi_mask_obj"] = torch.as_tensor( roi_mask_obj.astype("float32")).contiguous() dataset_dict["bbox_center"] = torch.as_tensor(bbox_center, dtype=torch.float32) dataset_dict["scale"] = scale dataset_dict["bbox"] = anno["bbox"] # NOTE: original bbox dataset_dict["roi_wh"] = torch.as_tensor( np.array([bw, bh], dtype=np.float32)) dataset_dict["resize_ratio"] = resize_ratio = out_res / scale z_ratio = inst_infos["trans"][2] / resize_ratio obj_center = anno["centroid_2d"] delta_c = obj_center - bbox_center dataset_dict["trans_ratio"] = torch.as_tensor( [delta_c[0] / bw, delta_c[1] / bh, z_ratio]).to(torch.float32) return dataset_dict
def _original_call(self, dataset_dict): """ Modified from detectron2's original __call__ in DatasetMapper """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below image = self._read_image(dataset_dict, format=self.img_format) if not self.backfill_size: utils.check_image_size(dataset_dict, image) image, dataset_dict = self._custom_transform(image, dataset_dict) inputs = AugInput(image=image) if "annotations" not in dataset_dict: transforms = AugmentationList( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens)(inputs) image = inputs.image else: # pass additional arguments, will only be used when the Augmentation # takes `annotations` as input inputs.annotations = dataset_dict["annotations"] # Crop around an instance if there are instances in the image. if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) inputs.image = crop_tfm.apply_image(image) transforms = AugmentationList(self.tfm_gens)(inputs) image = inputs.image if self.crop_gen: transforms = crop_tfm + transforms # Cache identical transforms in dataset_dict for subclass mappers # TODO T122215878 Find more explicit way to expose transforms used dataset_dict["transforms"] = transforms image_shape = image.shape[:2] # h, w if image.ndim == 2: image = np.expand_dims(image, 2) dataset_dict["image"] = torch.as_tensor( image.transpose(2, 0, 1).astype("float32")) # Can use uint8 if it turns out to be slow some day if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) if not self.is_train: dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices, ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) if "sem_seg_file_name" in dataset_dict: sem_seg_gt = read_sem_seg_file_with_prefetch( dataset_dict.pop("sem_seg_file_name"), prefetched=dataset_dict.get(PREFETCHED_SEM_SEG_FILE_NAME, None), ) if len(sem_seg_gt.shape) > 2: sem_seg_gt = sem_seg_gt.squeeze(2) sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt # extend standard D2 semantic segmentation to support multiple segmentation # files, each file can represent a class if "multi_sem_seg_file_names" in dataset_dict: raise NotImplementedError() if "_post_process_" in dataset_dict: proc_func = dataset_dict.pop("_post_process_") dataset_dict = proc_func(dataset_dict) return dataset_dict
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. # Most users would not need this feature. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk, min_box_size=self.proposal_min_box_size, ) # HACK Keep annotations for test # if not self.is_train: # # USER: Modify this if you want to keep them for some reason. # dataset_dict.pop("annotations", None) # dataset_dict.pop("sem_seg_file_name", None) # return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) return dataset_dict