Ejemplo n.º 1
0
    def test_augmentation_input_args(self):
        input_shape = (100, 100)
        output_shape = (50, 50)

        # define two augmentations with different args
        class TG1(T.Augmentation):
            def get_transform(self, image, sem_seg):
                return T.ResizeTransform(input_shape[0], input_shape[1],
                                         output_shape[0], output_shape[1])

        class TG2(T.Augmentation):
            def get_transform(self, image):
                assert image.shape[:2] == output_shape  # check that TG1 is applied
                return T.HFlipTransform(output_shape[1])

        image = np.random.rand(*input_shape).astype("float32")
        sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8")
        inputs = T.AugInput(image, sem_seg=sem_seg)  # provide two args
        tfms = inputs.apply_augmentations([TG1(), TG2()])
        self.assertIsInstance(tfms[0], T.ResizeTransform)
        self.assertIsInstance(tfms[1], T.HFlipTransform)
        self.assertTrue(inputs.image.shape[:2] == output_shape)
        self.assertTrue(inputs.sem_seg.shape[:2] == output_shape)

        class TG3(T.Augmentation):
            def get_transform(self, image, nonexist):
                pass

        with self.assertRaises(AttributeError):
            inputs.apply_augmentations([TG3()])
Ejemplo n.º 2
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # Load image.
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)
        # Panoptic label is encoded in RGB image.
        pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"),
                                      "RGB")

        # Reuses semantic transform for panoptic labels.
        aug_input = T.AugInput(image, sem_seg=pan_seg_gt)
        _ = self.augmentations(aug_input)
        image, pan_seg_gt = aug_input.image, aug_input.sem_seg

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # Generates training targets for Panoptic-DeepLab.
        targets = self.panoptic_target_generator(rgb2id(pan_seg_gt),
                                                 dataset_dict["segments_info"])
        dataset_dict.update(targets)

        return dataset_dict
Ejemplo n.º 3
0
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format="BGR")

        aug_input = T.AugInput(image)
        transforms = self.augmentations(aug_input)
        image = aug_input.image

        # if not self.is_train:
        #     # USER: Modify this if you want to keep them for some reason.
        #     dataset_dict.pop("annotations", None)
        #     dataset_dict.pop("sem_seg_file_name", None)
        #     return dataset_dict

        image_shape = image.shape[:2]  # h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        annos = [
            utils.transform_instance_annotations(obj, transforms, image_shape)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(annos, image_shape)
        dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Ejemplo n.º 4
0
    def test_augmentation_list(self):
        input_shape = (100, 100)
        image = np.random.rand(*input_shape).astype("float32")
        sem_seg = (np.random.rand(*input_shape) < 0.5).astype("uint8")
        inputs = T.AugInput(image, sem_seg=sem_seg)  # provide two args

        augs = T.AugmentationList([T.RandomFlip(), T.Resize(20)])
        _ = T.AugmentationList([augs, T.Resize(30)])(inputs)
Ejemplo n.º 5
0
        def apply_image_augmentations(image, dataset_dict, sem_seg_gt,
                                      augmentations):
            """Applies given augmentation to the given image and its attributes (segm, instances, etc).

            Almost no changes from D2's original code (apart from erasing non-relevant portions, e.g. for
            keypoints), just wrapped it in a function to avoid duplicate code."""

            aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
            transforms = augmentations(aug_input)
            image, sem_seg_gt = aug_input.image, aug_input.sem_seg

            image_shape = image.shape[:2]  # h, w
            # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
            # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
            # Therefore it's important to use torch.Tensor.
            dataset_dict["image"] = torch.as_tensor(
                np.ascontiguousarray(image.transpose(2, 0, 1)))
            if sem_seg_gt is not None:
                dataset_dict["sem_seg"] = torch.as_tensor(
                    sem_seg_gt.astype("long"))

            if not self.is_train:
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                return dataset_dict

            if "annotations" in dataset_dict:
                for anno in dataset_dict["annotations"]:
                    if not self.use_instance_mask:
                        anno.pop("segmentation", None)
                    if not self.use_keypoint:
                        anno.pop("keypoints", None)

                annos = [
                    utils.transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices,
                    ) for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
                instances = utils.annotations_to_instances(
                    annos, image_shape, mask_format=self.instance_mask_format)

                # After transforms such as cropping are applied, the bounding box may no longer
                # tightly bound the object. As an example, imagine a triangle object
                # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
                # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
                # the intersection of original bounding box and the cropping box.
                if self.recompute_boxes:
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )
                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

            return dataset_dict, transforms
Ejemplo n.º 6
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        # image1 = utils.convert_PIL_to_numpy(
        #     Image.open(dataset_dict["image_file1"]), format=self.image_format)
        # image2 = utils.convert_PIL_to_numpy(
        #     Image.open(dataset_dict["image_file2"]), format=self.image_format)
        image1 = utils.read_image(dataset_dict["image_file1"],
                                  format=self.image_format)
        image2 = utils.read_image(dataset_dict["image_file2"],
                                  format=self.image_format)
        flow_map = flow_utils.read_flow(dataset_dict["flow_map_file"])
        _check_shape(image1, image2, flow_map)

        height, width = image1.shape[:2]  # h, w
        dataset_dict["height"] = height
        dataset_dict["width"] = width

        # Apply augmentations
        aug_input = T.AugInput(image=image1)
        transforms = self.augmentations(aug_input)
        image1 = aug_input.image
        image2 = transforms.apply_image2(image2)
        flow_map = transforms.apply_flow(flow_map)
        _check_shape(image1, image2, flow_map)

        # Visualize
        # from detectron2.utils.flow_visualizer import (
        #     visualize_sample_from_array,
        #     visualize_sample_from_file
        # )
        # visualize_sample_from_array(image1, image2, flow_map, save=True)
        # visualize_sample_from_file(
        #     dataset_dict["image_file1"],
        #     dataset_dict["image_file2"],
        #     dataset_dict["flow_map_file"],
        #     save=True
        # )

        dataset_dict["image1"] = torch.as_tensor(
            np.ascontiguousarray(image1.transpose(2, 0, 1)))
        dataset_dict["image2"] = torch.as_tensor(
            np.ascontiguousarray(image2.transpose(2, 0, 1)))
        dataset_dict["flow_map"] = torch.as_tensor(
            np.ascontiguousarray(flow_map.transpose(2, 0, 1)))

        return dataset_dict
Ejemplo n.º 7
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        aug_input = T.AugInput(image)
        transforms = self.augmentations(aug_input)
        image = aug_input.image

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # Maps points from the closed interval [0, image_size - 1] on discrete
            # image coordinates to the half-open interval [x1, x2) on continuous image
            # coordinates. We use the continuous-discrete conversion from Heckbert
            # 1990 ("What is the coordinate of a pixel?"): d = floor(c) and c = d + 0.5,
            # where d is a discrete coordinate and c is a continuous coordinate.
            for ann in dataset_dict["annotations"]:
                point_coords_wrt_image = np.array(ann["point_coords"]).astype(np.float)
                point_coords_wrt_image = point_coords_wrt_image + 0.5
                ann["point_coords"] = point_coords_wrt_image

            annos = [
                # also need to transform point coordinates
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(
                annos,
                image_shape,
                sample_points=self.sample_points,
            )

            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
def __call__(self, dataset_dict):
    """
    Args:
        dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
    Returns:
        dict: a format that builtin models in detectron2 accept
    """
    dataset_dict = copy.deepcopy(
        dataset_dict)  # it will be modified by code below
    # USER: Write your own image loading if it's not from a file
    image = utils.read_image(dataset_dict["file_name"],
                             format=self.image_format)
    utils.check_image_size(dataset_dict, image)

    aug_input = T.AugInput(image)
    transforms = self.augmentations(aug_input)
    image = aug_input.image

    image_shape = image.shape[:2]  # h, w
    # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
    # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
    # Therefore it's important to use torch.Tensor.
    dataset_dict["image"] = torch.as_tensor(
        np.ascontiguousarray(image.transpose(2, 0, 1)))

    if "annotations" in dataset_dict:
        # USER: Implement additional transformations if you have other types of data
        annos = [
            utils.transform_instance_annotations(
                obj,
                transforms,
                image_shape,
                keypoint_hflip_indices=self.keypoint_hflip_indices)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(
            annos, image_shape, mask_format=self.instance_mask_format)

        # After transforms such as cropping are applied, the bounding box may no longer
        # tightly bound the object. As an example, imagine a triangle object
        # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
        # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
        # the intersection of original bounding box and the cropping box.

        dataset_dict["instances"] = utils.filter_empty_instances(instances)
    return dataset_dict
Ejemplo n.º 9
0
    def _load_image_with_annos(self, dataset_dict):
        """
        Load the image and annotations given a dataset_dict.
        """
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        aug_input = T.AugInput(image)
        transforms = self.augmentations(aug_input)
        image = aug_input.image

        image_shape = image.shape[:2]  # h, w

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return image, None

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other
            # types of data
            # apply meta_infos for mosaic transformation
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    add_meta_infos=self.add_meta_infos)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
        else:
            annos = None
        return image, annos
Ejemplo n.º 10
0
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict["file_name"], format="BGR")
        aug_input = T.AugInput(image)
        transforms = self.augmentations(aug_input)
        image = aug_input.image

        image_shape = image.shape[:2]  #h, w
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        annos = [
            utils.transform_instance_annotations(obj, transforms, image_shape)
            for obj in dataset_dict.pop("annotations")
            if obj.get("iscrowd", 0) == 0
        ]
        instances = utils.annotations_to_instances(annos, image_shape)
        dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
 def __call__(self, dataset_dict):
     dataset_dict = copy.deepcopy(dataset_dict)
     # it will be modified by code below
     # can use other ways to read image
     image = utils.read_image(dataset_dict["file_name"], format="BGR")
     # See "Data Augmentation" tutorial for details usage
     auginput = T.AugInput(image)
     transform = T.Resize((800, 800))(auginput)
     print(f'resized image {image["file_name"]}')
     image = torch.from_numpy(auginput.image.transpose(2, 0, 1))
     annos = [
         utils.transform_instance_annotations(annotation, [transform],
                                              image.shape[1:])
         for annotation in dataset_dict.pop("annotations")
     ]
     return {
         # create the format that the model expects
         "image": image,
         "instances":
         utils.annotations_to_instances(annos, image.shape[1:])
     }
Ejemplo n.º 12
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
        utils.check_image_size(dataset_dict, image)
        # USER: Remove if you don't do semantic/panoptic segmentation.

        if "sem_seg_file_name" in dataset_dict:
            if "category_colors" in dataset_dict:
                sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "RGB")
                sem_seg_gt = rgb2mask(sem_seg_gt, dataset_dict["category_colors"])
            else :
                sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L")
                sem_seg_gt = sem_seg_gt.squeeze(2)

        else :
            sem_seg_gt=None

        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
        transforms = self.augmentations(aug_input)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt,dtype=torch.long)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict
        return dataset_dict
Ejemplo n.º 13
0
    def __call__(self, dataset_dict):

        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        aug_input = T.AugInput(image, sem_seg=None)
        transforms = self.augmentations(aug_input)

        image = aug_input.image

        image_shape = image.shape[:2]
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        if "annotations" in dataset_dict:

            annos = [
                self.transform_instance_annotations_rotated(
                    obj, transforms, image_shape)
                # obj
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]

            instances = utils.annotations_to_instances_rotated(
                annos, image_shape)

            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Ejemplo n.º 14
0
def augment(im):
    input = T.AugInput(im)
    transform = augs(input)  # type: T.Transform
    x = input.image  # new image
    
    return x
Ejemplo n.º 15
0
scallop_metadata = MetadataCatalog.get(P.DATASET_DIR + "train")

cfg = get_cfg()
if USE_SAVED_MODEL:
    cfg.merge_from_file('config.yml')
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, P.MODEL_PATH)
else:
    cfg.merge_from_file(
        "./detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    )
    cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"

if SHOW_INPUTS:
    for d in random.sample(dataset_dicts, 10):
        img = cv2.imread(d["file_name"])
        input = transforms.AugInput(img)
        transform = augs(input)
        image_transformed = input.image

        visualizer = Visualizer(img[:, :, ::-1],
                                metadata=scallop_metadata,
                                scale=0.5)
        vis = visualizer.draw_dataset_dict(d)
        img = vis.get_image()[:, :, ::-1]
        cv2.imshow("Original image", img)

        visualizer = Visualizer(image_transformed[:, :, ::-1],
                                metadata=scallop_metadata,
                                scale=0.5)
        vis = visualizer.draw_dataset_dict(d)
        image_transformed = vis.get_image()[:, :, ::-1]
Ejemplo n.º 16
0
    def _desc_to_example(desc: Dict):
        # Detectron2 Model Input Format:
        # image: Tensor[C, H, W];
        # height, width: output height and width;
        # instances: Instances Object to training, with the following fields:
        #     "gt_boxes":
        #     "gt_classes":
        #     "gt_masks": a PolygonMasks or BitMasks object storing N masks, one for each instance.
        desc = copy.deepcopy(desc)  # it will be modified by code below
        image_path = os.path.join(images_dir, f'{desc["image_id"]}.jpg')
        # shape: [H, W, C]
        origin_image = detection_utils.read_image(image_path, format="BGR")
        oh, ow, oc = origin_height, origin_width, origin_channels = origin_image.shape

        if augmentations is not None:
            aug_input = T.AugInput(origin_image)
            transforms = augmentations(aug_input)
            auged_image = aug_input.image
        else:
            auged_image = origin_image
        ah, aw, ac = auged_height, auged_width, auged_channels = auged_image.shape

        if not is_train:
            return {
                "image_id":
                desc['image_id'],  # COCOEvaluator.process() need it.
                # expected shape: [C, H, W]
                "image":
                torch.as_tensor(
                    np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
                "height":
                auged_height,
                "width":
                auged_width,
            }

        target = Instances(image_size=(ah, aw))
        if 'fill gt_boxes':
            # shape: n_box, 4
            boxes_abs = np.array(
                [anno['bbox'] for anno in desc['annotations']])
            if augmentations is not None:
                # clip transformed bbox to image size
                boxes_auged = transforms.apply_box(
                    np.array(boxes_abs)).clip(min=0)
                boxes_auged = np.minimum(
                    boxes_auged,
                    np.array([aw, ah, aw, ah])[np.newaxis, :])
            else:
                boxes_auged = boxes_abs
            target.gt_boxes = Boxes(boxes_auged)
        if 'fill gt_classes':
            classes = [anno['category_id'] for anno in desc['annotations']]
            classes = torch.tensor(classes, dtype=torch.int64)
            target.gt_classes = classes
        if 'fill gt_masks':
            mask_paths = [
                os.path.join(masks_dir, f'{anno["mask_id"]}.png')
                for anno in desc['annotations']
            ]
            masks = np.array(
                list(
                    map(
                        lambda p: cv2.resize(cv2.imread(
                            p, flags=cv2.IMREAD_GRAYSCALE),
                                             dsize=(ow, oh)), mask_paths)))
            if augmentations is not None:
                masks_auged = np.array(
                    list(map(lambda x: transforms.apply_segmentation(x),
                             masks)))
            else:
                masks_auged = masks
            masks_auged = masks_auged > MASK_THRESHOLD
            masks_auged = BitMasks(
                torch.stack([
                    torch.from_numpy(np.ascontiguousarray(x))
                    for x in masks_auged
                ]))
            target.gt_masks = masks_auged

        return {
            "image_id":
            desc['image_id'],  # COCOEvaluator.process() need it.
            # expected shape: [C, H, W]
            "image":
            torch.as_tensor(
                np.ascontiguousarray(auged_image.transpose(2, 0, 1))),
            "height":
            auged_height,
            "width":
            auged_width,
            "instances":
            target,  # refer: annotations_to_instances()
        }
Ejemplo n.º 17
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
        transforms = self.augmentations(aug_input)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances_with_attributes(
                annos,
                image_shape,
                mask_format=self.instance_mask_format,
                load_attributes=self.attribute_on,
                max_attr_per_ins=self.max_attr_per_ins)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
    def __call__(self, dataset_dict):
        '''
            Adapted from https://detectron2.readthedocs.io/_modules/detectron2/data/dataset_mapper.html#DatasetMapper
        '''
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        try:
            image = cv2.imdecode(
                np.frombuffer(
                    self.fileServer.getFile(dataset_dict["file_name"]),
                    np.uint8), -1)
            if self.image_format == 'RGB':
                # flip along spectral dimension
                if image.ndim >= 3:
                    image = np.flip(image, 2)
        except:
            #TODO: cannot handle corrupt data input here; needs to be done earlier
            print(
                'WARNING: Image {} is corrupt and could not be loaded.'.format(
                    dataset_dict["file_name"]))
            image = None
        # ORIGINAL: image = utils.read_image(dataset_dict["file_name"], format=self.image_format)

        utils.check_image_size(dataset_dict, image)

        # convert annotations from relative to XYXY absolute format if needed
        image_shape = image.shape[:2]

        if 'annotations' in dataset_dict:
            for anno in dataset_dict['annotations']:
                if 'bbox_mode' in anno and anno['bbox_mode'] in [
                        BoxMode.XYWH_REL, BoxMode.XYXY_REL
                ]:
                    if anno['bbox_mode'] == BoxMode.XYWH_REL:
                        anno['bbox'][0] -= anno['bbox'][2] / 2
                        anno['bbox'][1] -= anno['bbox'][3] / 2
                        anno['bbox'][2] += anno['bbox'][0]
                        anno['bbox'][3] += anno['bbox'][1]
                    anno['bbox'][0] *= image_shape[0]  #TODO: check order
                    anno['bbox'][1] *= image_shape[1]  #TODO: check order
                    anno['bbox'][2] *= image_shape[0]  #TODO: check order
                    anno['bbox'][3] *= image_shape[1]  #TODO: check order
                    anno['bbox_mode'] = BoxMode.XYXY_ABS

        if "segmentationMask" in dataset_dict:
            try:
                raster = np.frombuffer(base64.b64decode(
                    dataset_dict['segmentationMask']),
                                       dtype=np.uint8)
                sem_seg_gt = np.reshape(raster,
                                        image_shape)  #TODO: check format
                if self.classIndexMap is not None:
                    sem_seg_gt_copy = np.copy(sem_seg_gt)
                    for k, v in self.classIndexMap.items():
                        sem_seg_gt_copy[sem_seg_gt == k] = v
                    sem_seg_gt = sem_seg_gt_copy
            except:
                print(
                    'WARNING: Segmentation mask for image "{}" could not be loaded or decoded.'
                    .format(dataset_dict["file_name"]))
                sem_seg_gt = None
            # ORIGINAL: sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        if "gt_label" in dataset_dict:
            dataset_dict["gt_label"] = torch.LongTensor(
                [dataset_dict["gt_label"]])

        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
        transforms = self.augmentations(aug_input)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        if "annotations" in dataset_dict:
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes and len(instances) and hasattr(
                    instances, 'gt_masks'):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(
                instances
            )  #TODO: do we want that? Maybe limit to width and height assignment to dict entry...
        return dataset_dict
Ejemplo n.º 19
0
    def __call__(self, dataset_dict: dict):
        dataset_dict = deepcopy(dataset_dict)

        image = dutils.read_image(
            dataset_dict.get('file_name'), format=self.cfg.INPUT.FORMAT
        )
        mask = dutils.read_image(
            dataset_dict.pop('sem_seg_file_name'),
        )

        assert image.shape[:2] == mask.shape[:2]

        obj_ids = np.unique(mask)[1:]
        masks = mask == obj_ids[:, None, None]

        annotations = []
        for i in range(len(obj_ids)):
            pos = np.where(masks[i])
            box = (
                np.min(pos[1]),
                np.min(pos[0]),
                np.max(pos[1]),
                np.max(pos[0]),
            )
            annotations.append(
                {
                    'bbox': box,
                    'bbox_mode': 0,
                    'category_id': 0,
                    'segmentation': encode(
                        np.array(mask, dtype=np.uint8, order='F')
                    ),
                }
            )

        if not self.is_train:
            return dict(image=image, annotations=annotations)

        aug_input = T.AugInput(image, sem_seg=mask)
        transforms = aug_input.apply_augmentations(self._augmentation)
        image = torch.from_numpy(
            aug_input.image.transpose((2, 0, 1)).astype('float32')
        )
        mask = torch.from_numpy(aug_input.sem_seg.astype('float32'))

        annos = [
            dutils.transform_instance_annotations(
                annotation, transforms, image.shape[1:]
            )
            for annotation in annotations
        ]

        instances = dutils.annotations_to_instances(
            annos, image.shape[1:], mask_format=self.cfg.INPUT.MASK_FORMAT
        )
        # instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

        dataset_dict['image'] = image
        dataset_dict['sem_seg'] = mask
        # dataset_dict['instances'] = instances[instances.gt_boxes.nonempty()]
        dataset_dict['instances'] = dutils.filter_empty_instances(instances)
        return dataset_dict