Ejemplo n.º 1
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if self.crop_gen is None:
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
        else:
            if np.random.rand() > 0.5:
                image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            else:
                image, transforms = T.apply_transform_gens(
                    self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image
                )

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                # anno.pop("segmentation", None) # TODO #1 keep segmentation annotation
                anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(obj, transforms, image_shape)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(annos, image_shape)
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
    def test_transform_simple_annotation(self):
        transforms = T.TransformList([T.HFlipTransform(400)])
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "category_id": 3,
            "segmentation": [[10, 10, 100, 100, 100, 10], [150, 150, 200, 150, 200, 200]],
        }

        output = detection_utils.transform_instance_annotations(anno, transforms, (400, 400))
        self.assertTrue(np.allclose(output["bbox"], [200, 10, 390, 300]))
        self.assertEqual(len(output["segmentation"]), len(anno["segmentation"]))
        self.assertTrue(np.allclose(output["segmentation"][0], [390, 10, 300, 100, 300, 10]))

        detection_utils.annotations_to_instances([output, output], (400, 400))
Ejemplo n.º 3
0
def mapper(dataset_dict):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    pdb.set_trace()
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    image, transforms = T.apply_transform_gens([T.Resize((800, 800))], image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
    annos = [
		utils.transform_instance_annotations(obj, transforms, image.shape[:2])
		for obj in dataset_dict.pop("annotations")
		if obj.get("iscrowd", 0) == 0
	]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 4
0
def custom_mapper(dataset_dict, transform_list):
    dataset_dict = copy.deepcopy(
        dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")

    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
    def test_crop(self):
        transforms = T.TransformList([T.CropTransform(300, 300, 10, 10)])
        keypoints = np.random.rand(17, 3) * 50 + 15
        keypoints[:, 2] = 2
        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "keypoints": keypoints,
        }

        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )
        self.assertTrue((output["bbox"] == np.asarray([-290, -290, -100, 0])).all())
        # keypoints are no longer visible
        self.assertTrue((output["keypoints"][:, 2] == 0).all())
Ejemplo n.º 6
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        image, transforms = T.apply_transform_gens(self.augmentation, image)
        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        annos = [
            utils.transform_instance_annotations(obj, transforms,
                                                 image.shape[:2])
            for obj in dataset_dict.pop("annotations")
        ]
        dataset_dict["instances"] = utils.annotations_to_instances(
            annos, image.shape[:2])

        #         # USER: Implement additional transformations if you have other types of data
        #         # USER: Don't call transpose_densepose if you don't need
        #         annos = [
        #             self._transform_densepose(
        #                 utils.transform_instance_annotations(
        #                     obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
        #                 ),
        #                 transforms,
        #             )
        #             for obj in dataset_dict.pop("annotations")
        #             if obj.get("iscrowd", 0) == 0
        #         ]

        #         instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask")

        #         dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
        return dataset_dict
def __call__(self, dataset_dict):
    """
    Args:
        dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
    Returns:
        dict: a format that builtin models in detectron2 accept
    """
    dataset_dict = copy.deepcopy(
        dataset_dict)  # it will be modified by code below
    # USER: Write your own image loading if it's not from a file
    image = utils.read_image(dataset_dict["file_name"],
                             format=self.image_format)
    utils.check_image_size(dataset_dict, image)

    aug_input = T.AugInput(image)
    transforms = self.augmentations(aug_input)
    image = aug_input.image

    image_shape = image.shape[:2]  # h, w
    # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
    # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
    # Therefore it's important to use torch.Tensor.
    dataset_dict["image"] = torch.as_tensor(
        np.ascontiguousarray(image.transpose(2, 0, 1)))

    if "annotations" in dataset_dict:
        # USER: Implement additional transformations if you have other types of data
        annos = [
            utils.transform_instance_annotations(
                obj,
                transforms,
                image_shape,
                keypoint_hflip_indices=self.keypoint_hflip_indices)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(
            annos, image_shape, mask_format=self.instance_mask_format)

        # After transforms such as cropping are applied, the bounding box may no longer
        # tightly bound the object. As an example, imagine a triangle object
        # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
        # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
        # the intersection of original bounding box and the cropping box.

        dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 8
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        for anno in dataset_dict["annotations"]:
            if not self.mask_on:
                anno.pop("segmentation", None)
            if not self.keypoint_on:
                anno.pop("keypoints", None)

        # USER: Implement additional transformations if you have other types of data
        # USER: Don't call transpose_densepose if you don't need
        annos = [
            self._transform_densepose(
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                ),
                transforms,
            )
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(annos, image_shape)

        if len(annos) and "densepose" in annos[0]:
            gt_densepose = [obj["densepose"] for obj in annos]
            instances.gt_densepose = DensePoseList(gt_densepose, instances.gt_boxes, image_shape)

        dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
        return dataset_dict
Ejemplo n.º 9
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of ONE video, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept

        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below

        if self.is_train:
            dataset_dict = # TODO: sample a fixed number of frames

        new_dataset_dict = []
        for item in dataset_dict:
            image = utils.read_image(item["filename"], format=self.img_format)
            utils.check_image_size(item, image)

            # TODO: SSD random crop
            image, transforms = T.apply_transform_gens(
                self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image
            )

            image_shape = image.shape[:2]  # h, w

            # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
            # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
            # Therefore it's important to use torch.Tensor.
            image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
            sample = {"image": image}

            if not self.is_train:
                new_dataset_dict.append(sample)
                continue

            # USER: Implement additional transformations if you have other types of data
            boxes = [
                utils.transform_instance_annotations(box, transforms, image_shape)
                for box in item["boxes"]
            ]
            instances = # 
            sample["instances"] = instances
            new_dataset_dict.append(sample)

        return new_dataset_dict
Ejemplo n.º 10
0
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        aug_input = T.AugInput(image)
        transforms = self.augmentations(aug_input)
        image = aug_input.image

        image_shape = image.shape[:2]  #h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        annos = [
            utils.transform_instance_annotations(obj, transforms, image_shape)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(annos, image_shape)
        dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Ejemplo n.º 11
0
def customMapper(dataset_dict):
  dataset_dict = copy.deepcopy(dataset_dict)
  image = utils.read_image(dataset_dict["file_name"], format="BGR")

  transform_list = [
                    T.Resize((600, 800)),
                    T.RandomFlip(prob=0.6, horizontal=True, vertical=False),
                    T.RandomFlip(prob=0.6, horizontal=False, vertical=True),
                    ]
  image, transforms = T.apply_transform_gens(transform_list, image)
  dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
  annos = [
		utils.transform_instance_annotations(obj, transforms, image.shape[:2])
		for obj in dataset_dict.pop("annotations")
		if obj.get("iscrowd", 0) == 0
	]
  instances = utils.annotations_to_instances(annos, image.shape[:2])
  dataset_dict["instances"] = utils.filter_empty_instances(instances)
  return dataset_dict
 def __call__(self, dataset_dict):
     dataset_dict = copy.deepcopy(dataset_dict)
     # it will be modified by code below
     # can use other ways to read image
     image = utils.read_image(dataset_dict["file_name"], format="BGR")
     # See "Data Augmentation" tutorial for details usage
     auginput = T.AugInput(image)
     transform = T.Resize((800, 800))(auginput)
     print(f'resized image {image["file_name"]}')
     image = torch.from_numpy(auginput.image.transpose(2, 0, 1))
     annos = [
         utils.transform_instance_annotations(annotation, [transform],
                                              image.shape[1:])
         for annotation in dataset_dict.pop("annotations")
     ]
     return {
         # create the format that the model expects
         "image": image,
         "instances":
         utils.annotations_to_instances(annos, image.shape[1:])
     }
    def test_transform_RLE_resize(self):
        transforms = T.TransformList(
            [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
        )
        mask = np.zeros((300, 400), order="F").astype("uint8")
        mask[:, :200] = 1

        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": mask_util.encode(mask[:, :, None])[0],
            "category_id": 3,
        }
        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )

        inst = detection_utils.annotations_to_instances(
            [output, output], (400, 400), mask_format="bitmask"
        )
        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
def custom_mapper(input_dict):
    dataset_dict = copy.deepcopy(
        input_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [
        T.Resize((1200, 1200)),
        T.RandomFlip(prob=0.6, horizontal=True, vertical=False),
        T.RandomFlip(prob=0.6, horizontal=False, vertical=True),
        T.RandomContrast(0.7, 3.2),
        T.RandomBrightness(0.6, 1.8),
    ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))
    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 15
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict['file_name'],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)
        if self.crop_gen is None:
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
        elif np.random.rand() > 0.5:
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
        else:
            image, transforms = T.apply_transform_gens(
                self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:], image)
        image_shape = image.shape[:2]
        dataset_dict['image'] = paddle.to_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if not self.is_train:
            dataset_dict.pop('annotations', None)
            return dataset_dict
        if 'annotations' in dataset_dict:
            for anno in dataset_dict['annotations']:
                if not self.mask_on:
                    anno.pop('segmentation', None)
                anno.pop('keypoints', None)
            annos = [
                utils.transform_instance_annotations(obj, transforms,
                                                     image_shape)
                for obj in dataset_dict.pop('annotations')
                if obj.get('iscrowd', 0) == 0
            ]
            instances = utils.annotations_to_instances(annos, image_shape)
            dataset_dict['instances'] = utils.filter_empty_instances(instances)
        return dataset_dict
Ejemplo n.º 16
0
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(
        dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [
        InvertColors(),
        T.RandomBrightness(0.8, 1.8),
        T.RandomContrast(0.6, 1.3),
        T.RandomSaturation(0.8, 1.4),
        T.RandomLighting(0.7),
    ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 17
0
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    image, transforms = T.apply_transform_gens([
        T.Resize((1920, 1080)),
        T.RandomFlip(0.1),
        T.RandomSaturation(0.9, 1.1),
        T.RandomBrightness(0.9, 1.1),
        T.RandomContrast(0.9, 1.1)
    ], image)

    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 18
0
    def test_transform_uncompressed_RLE(self):
        transforms = T.TransformList([T.HFlipTransform(400)])
        mask = np.zeros((300, 400)).astype("uint8")
        mask[:, :200] = 1

        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": binary_mask_to_uncompressed_rle(mask),
            "category_id": 3,
        }
        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (300, 400)
        )
        mask = output["segmentation"]
        self.assertTrue((mask[:, 200:] == 1).all())
        self.assertTrue((mask[:, :200] == 0).all())

        inst = detection_utils.annotations_to_instances(
            [output, output], (400, 400), mask_format="bitmask"
        )
        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
Ejemplo n.º 19
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        # Implement a mapper, similar to the default DatasetMapper, but with own customizations
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        
        # Custom augs to be used while training
        augs = [T.RandomFlip(0.6, horizontal=False, vertical=True)]

        if self.is_train:
            tfm_gens = self.tfm_gens + augs
        else:
            tfm_gens = self.tfm_gens

        logging.getLogger(__name__).info("Original Augmentation: " + str(self.tfm_gens))

        logging.getLogger(__name__).info("Updated Augmentation List: " + str(tfm_gens))

        image, transforms = T.apply_transform_gens(tfm_gens, image)
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

        annos = [
            utils.transform_instance_annotations(obj, transforms, image.shape[:2])
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(annos, image.shape[:2])
        dataset_dict["instances"] = utils.filter_empty_instances(instances)
        dataset_dict["instances"] = filter_small_instances(dataset_dict["instances"], self.min_area, self.max_area)
        return dataset_dict
Ejemplo n.º 20
0
    def _original_call(self, dataset_dict):
        """
        Modified from detectron2's original __call__ in DatasetMapper
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        image = self._read_image(dataset_dict, format=self.img_format)
        if not self.backfill_size:
            utils.check_image_size(dataset_dict, image)

        image, dataset_dict = self._custom_transform(image, dataset_dict)

        inputs = AugInput(image=image)
        if "annotations" not in dataset_dict:
            transforms = AugmentationList(
                ([self.crop_gen] if self.crop_gen else []) +
                self.tfm_gens)(inputs)
            image = inputs.image
        else:
            # pass additional arguments, will only be used when the Augmentation
            #   takes `annotations` as input
            inputs.annotations = dataset_dict["annotations"]
            # Crop around an instance if there are instances in the image.
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                inputs.image = crop_tfm.apply_image(image)
            transforms = AugmentationList(self.tfm_gens)(inputs)
            image = inputs.image
            if self.crop_gen:
                transforms = crop_tfm + transforms

        # Cache identical transforms in dataset_dict for subclass mappers
        # TODO T122215878 Find more explicit way to expose transforms used
        dataset_dict["transforms"] = transforms

        image_shape = image.shape[:2]  # h, w
        if image.ndim == 2:
            image = np.expand_dims(image, 2)
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                ) for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = read_sem_seg_file_with_prefetch(
                dataset_dict.pop("sem_seg_file_name"),
                prefetched=dataset_dict.get(PREFETCHED_SEM_SEG_FILE_NAME,
                                            None),
            )
            if len(sem_seg_gt.shape) > 2:
                sem_seg_gt = sem_seg_gt.squeeze(2)
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        # extend standard D2 semantic segmentation to support multiple segmentation
        # files, each file can represent a class
        if "multi_sem_seg_file_names" in dataset_dict:
            raise NotImplementedError()

        if "_post_process_" in dataset_dict:
            proc_func = dataset_dict.pop("_post_process_")
            dataset_dict = proc_func(dataset_dict)

        return dataset_dict
    def __call__(self, dataset_dict):
        assert 'annotations' in dataset_dict, '今回はセグメンテーションのみを対象にする'
        assert not 'sem_seg_file_name' in dataset_dict, 'パノプティックセグメンテーションは行わない'

        dataset_dict = copy.deepcopy(dataset_dict)

        image = utils.read_image(dataset_dict['file_name'],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        # 明るさ・コントラスト・彩度・カットアウト
        if self.cont_gen is not None:
            tfm = self.cont_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.bright_gen is not None:
            tfm = self.bright_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.sat_gen is not None:
            tfm = self.sat_gen.get_transform(image)
            image = tfm.apply_image(image)
        if self.cutout_gen is not None:
            tfm = self.cutout_gen.get_transform(image)
            image = tfm.apply_image(image)

        # アフィン
        if self.rotate_gen is not None:
            rotate_tfm = self.rotate_gen.get_transform(image)
            image = rotate_tfm.apply_image(image)
        if self.shear_gen is not None:
            shear_tfm = self.shear_gen.get_transform(image)
            image = shear_tfm.apply_image(image)
        if self.extent_gen is not None:
            extent_tfm = self.extent_gen.get_transform(image)
            image = extent_tfm.apply_image(image)
        if self.crop_gen is not None:
            crop_tfm = utils.gen_crop_transform_with_instance(
                self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2],
                np.random.choice(dataset_dict['annotations']))
            image = crop_tfm.apply_image(image)

        image, transforms = T.apply_transform_gens(self.tfm_gens, image)

        if self.crop_gen is not None:
            transforms = crop_tfm + transforms
        if self.extent_gen is not None:
            transforms = extent_tfm + transforms
        if self.shear_gen is not None:
            transforms = shear_tfm + transforms
        if self.rotate_gen is not None:
            transforms = rotate_tfm + transforms

        # テストの場合はアノテーションがいらないので削除して終了
        if not self.is_train:
            dataset_dict.pop('annotations', None)
            dataset_dict.pop('sem_seg_file_name', None)
            return dataset_dict

        image_shape = image.shape[:2]  # h, w
        dataset_dict['image'] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        annos = [
            utils.transform_instance_annotations(obj,
                                                 transforms,
                                                 image_shape,
                                                 keypoint_hflip_indices=None)
            for obj in dataset_dict.pop('annotations')
            if obj.get("iscrowd", 0) == 0
        ]

        instances = utils.annotations_to_instances(
            annos, image_shape, mask_format=self.mask_format)

        # マスクからバウンディングボックスを作成
        if self.crop_gen and instances.has("gt_masks"):
            instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

        dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Ejemplo n.º 22
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

#         ################################################################################################################
#         print("AutoAugDet:", dataset_dict["file_name"])
#         h, w, c = image.shape
#         if h <= 0 or w <=0:
#             print("Empty image")
#         if self.autoaugdet and "annotations" in dataset_dict:
#             from detectron2.structures.boxes import BoxMode
#             bboxes = []
#             for label in dataset_dict["annotations"]:
#                 assert label['bbox_mode'] == BoxMode.XYWH_ABS
#                 bboxes.append(label['bbox'])
#             # import cv2, random
#             # showimg_in = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             try:
#                 image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet)
#             except Exception as  e:
#                 print("AutoAug Error:", e)
#             # showimg_out = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             # cv2.imshow("in", showimg_in)
#             # cv2.imshow("out", showimg_out)
#             # cv2.waitKey(0)
#             for i in range(len(bboxes)):
#                 dataset_dict["annotations"][i]['bbox'] = bboxes[i]

#         #################################################################################################       
        
        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)

            image, transforms = T.apply_transform_gens(self.tfm_gens, image)

            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
                
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
            
            # generate heatmaps of keypoints
            #if dataset_dict["instances"].has("gt_keypoints"):
            #
            
            #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps
            # turning instance-level segmentation map into semantic segmap            
            # get the contour map for segmentation-based detection            
            dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape)
            
            kpts = [obj.get("keypoints", []) for obj in annos]
            map_shape = (image_shape[0], image_shape[1])
            kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape)
            dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1)
            dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1)
            
            ################################################################
#             # visualize the keypoints
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path

#             image_rgb = image[..., ::-1]
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             binary_masks = kp_maps>0.1
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.shape)
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0])
#             assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks

#             for i in range(binary_masks.shape[0]):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            ################################################################
            
            ################################################
#             # visualize the segmentation mask
#             from os import path
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             segmask = dataset_dict["semseg"].tensor.numpy()
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
            
#             im = Image.fromarray(np.uint8(image_rgb))
#             filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png'
#             im.save(filepath)
            
#             im2 = Image.fromarray(np.uint8(segmask*3))
#             filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png'
#             im2.save(filepath2)
            
            ################################################
            
            ###############
#             # visualize the segmentation map and contours
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path
#             #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8)            
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             #binary_masks = dataset_dict["contours"].gt_segmasks.tensor
#             binary_masks = dataset_dict["contours"].gt_contours.tensor
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.size())
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0])
#             assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks.numpy()
# #             bm_uint8 = bm.astype("uint8")
# #             print(bm)
#             for i in range(binary_masks.size(0)):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            
################################################################################################# 

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
            
        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
        utils.check_image_size(dataset_dict, image)
        image = transform(image=image)["image"]

        ############################################################################

        '''
        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)

        
        h, w, _ = image.shape
        utils.check_image_size(dataset_dict, image)
        
        bboxes = [ann["bbox"] for ann in dataset_dict['annotations']]
        labels = [ann['category_id'] for ann in dataset_dict['annotations']]
        class_labels = [CLASSES[label] for label in labels]

        segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']]
        #cprint("before :" , segmentations)
        masks = convert_coco_poly_to_mask(segmentations, h, w)
        masks = [mask.numpy() for mask in masks]      
        
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks)

        image = transformed["image"]
        bboxes = transformed["bboxes"]

        class_labels = transformed["class_labels"]
        labels = [CLASSES.index(cl) for cl in class_labels]

        
        filtered_masks = []
        
        
        for mask in transformed["masks"]:
            #if len(np.unique(mask)) > 1:
                filtered_masks.append(mask)

        
        if len(bboxes) != len(filtered_masks):
            print(len(bboxes), len(filtered_masks), len(labels))
        
        #print(len(bboxes), len(masks), len(labels))
        seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks]
        
        for idx in range(len(labels)):
            dataset_dict['annotations'][idx]["bbox"] = bboxes[idx]
            dataset_dict['annotations'][idx]["labels"] = labels[idx]
            dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx]
        
        dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)]
        '''
        
        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
            )

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Ejemplo n.º 24
0
  def train_mapper(self,dataset_dict):#,dataset_used):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    # Crop for all but comparison
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # Comparison has bbox the size of the image, so dont bother cropping
    else:
      # scaled between 0.5 and 1; shifted up to 0.5 in each dimension
      # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) )
      # transforms = T.TransformList([randomExtant])
      transforms = T.TransformList([])

    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # NOTE: YOLO input size must be multiple of 32
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    else:# Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to threshold, other in proportion
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))
 

        # Apply the scaling transform
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    transforms = transforms + tfms

    # Apply Other Transforms ##
    # Standard random image mods
    if(self.dataset_used != "comparison"):
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # More extreme for comparison set
    else:
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image)
    transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    PILImage = Image.fromarray(image)

    # Standard affine
    if(self.dataset_used != "comparison"):
      shear_range = 8
      angle_range = 30
      # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8))
      # rand_angle = np.random.uniform(-30,30)
    # More extreme random affine for comparison
    else:
      shear_range = 50
      angle_range = 30
      # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30))
      # rand_angle = np.random.uniform(-70,70)

    rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range))
    rand_angle = np.random.uniform(-angle_range,angle_range)

    RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle)
    # Apply affine to image
    image = RandAffT.apply_image(image.copy())
    # Append to transforms
    transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    # bboxes
    # if(self.dataset_used != "comparison"):
    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    # # no bboxes
    # else:
    #   instances = Instances(  (dataset_dict["height"],dataset_dict["width"])  )
    #   instances.gt_classes = torch.tensor([dataset_dict["classID"]])
    #   dataset_dict["instances"] = instances

    dataset_dict["transforms"] = transforms

    return dataset_dict
Ejemplo n.º 25
0
  def test_mapper(self,dataset_dict):#,dataset_used):
    # If we're mapping at test time
    if(self.is_test_time_mapping):
      return self.train_mapper(dataset_dict)



    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # else:
      # nudgedH = dataset_dict["height"]
      # nudgedW = dataset_dict["width"]
    else:
      transforms = T.TransformList([])

      
    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # the way ive done vgg and yolo means they need the same size images
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    # not vgg or yolo
    else:# Downscale only at this threshold
      # Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to 1333, the shorter to 800
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))

        # Apply the scaling transform
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    # image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    # transforms = transforms + tfms

    # Apply Other Transforms ##
    # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    # PILImage = Image.fromarray(image)
    # RandAffT = RandomAffineTransform(PILImage.size)
    # Apply affine to image
    # image = RandAffT.apply_image(image.copy())
    # Append to transforms
    # transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    dataset_dict["transforms"] = transforms

    return dataset_dict

  # # Small mappers
  # def small_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"small")

  # def small_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"small")
  #   else:
  #     return self.train_mapper(dataset_dict,"small")

  # # Large mappers
  # def large_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"large")

  # def large_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"large")
  #   else:
  #     return self.train_mapper(dataset_dict,"large")

  # # Full mappers
  # def full_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"full")

  # def full_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"full")
  #   else:
  #     return self.train_mapper(dataset_dict,"full")

  # # Comparison mappers
  # def comparison_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"comparison")

  # def comparison_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"comparison")
  #   else:
  #     return self.train_mapper(dataset_dict,"comparison")
Ejemplo n.º 26
0
    def __call__(self, dataset_dict):
        """
                Args:
                    dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

                Returns:
                    dict: a format that builtin models in detectron2 accept
                """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)
            # @ Will Lee 精细分类类别:非标准,基本标准 or 标准
            standard_ids = [obj["standard_id"] for obj in annos]
            standard_ids = torch.tensor(standard_ids, dtype=torch.int64)
            instances.gt_standards = standard_ids

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Ejemplo n.º 27
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        ### my code ###
        ## segmentaion の annotation を一旦退避して、後で追加する
        seg_bk = [
            dictwk["segmentation"] for dictwk in dataset_dict["annotations"]
        ]
        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i].pop("segmentation")
        image, dataset_dict = self.aug_handler(
            image=image, dataset_dict_detectron=dataset_dict)
        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i]["segmentation"] = seg_bk[i]
        ### my code ###

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
Ejemplo n.º 28
0
    def __call__(self, dataset_dict):
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        utils.check_image_size(dataset_dict, image)
        data_transformations = []

        if self.is_train:
            # Crop
            if self.crop:
                crop_gen = T.RandomCrop(self.crop_type, self.crop_size)
                data_transformations.append(crop_gen)
                print('crop')
            # Horizontal flip
            if self.flip:
                flip_gen = T.RandomFlip()
                data_transformations.append(flip_gen)
            # if self.rotation:
            #     rotation_gen = T.RandomRotation([0, 90])
            #     data_transformations.append(rotation_gen)
            if self.saturation:
                saturation_gen = T.RandomSaturation(0.5, 1.5)
                data_transformations.append(saturation_gen)
                print(str(dataset_dict["file_name"]))

        image, transforms = T.apply_transform_gens(data_transformations, image)
        print(
            '\n\n -------------------PRINTING IMAGE---------------------- \n\n'
        )
        img_name = dataset_dict["file_name"][len(dataset_dict["file_name"]) -
                                             15:len(dataset_dict["file_name"]
                                                    ) - 4]
        img_name = '/home/grupo01/images_augmented/' + img_name + '_augmented.png'
        print(len(dataset_dict["file_name"]))
        print(img_name)

        cv2.imwrite(img_name, image)

        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        image_shape = image.shape[:2]

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=None)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Ejemplo n.º 29
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.img_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        if "annotations" not in dataset_dict or len(
                dataset_dict["annotations"]) == 0:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = dataset_dict["file_name"].replace(
                    'train2017',
                    'thing_train2017').replace('image/train', 'thing_train')
            else:
                basis_sem_path = dataset_dict["file_name"].replace(
                    'coco',
                    'lvis').replace('train2017',
                                    'thing_train').replace('jpg', 'npz')
            basis_sem_path = basis_sem_path.replace('jpg', 'npz')
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
Ejemplo n.º 30
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        # HACK Keep annotations for test
        # if not self.is_train:
        #     # USER: Modify this if you want to keep them for some reason.
        #     dataset_dict.pop("annotations", None)
        #     dataset_dict.pop("sem_seg_file_name", None)
        #     return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict