Beispiel #1
0
    def __call__(self, dataset_dict):
        """
                Args:
                    dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

                Returns:
                    dict: a format that builtin models in detectron2 accept
                """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.image_format)
        utils.check_image_size(dataset_dict, image)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format)
            # @ Will Lee 精细分类类别:非标准,基本标准 or 标准
            standard_ids = [obj["standard_id"] for obj in annos]
            standard_ids = torch.tensor(standard_ids, dtype=torch.int64)
            instances.gt_standards = standard_ids

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
Beispiel #2
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        # HACK Keep annotations for test
        # if not self.is_train:
        #     # USER: Modify this if you want to keep them for some reason.
        #     dataset_dict.pop("annotations", None)
        #     dataset_dict.pop("sem_seg_file_name", None)
        #     return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Beispiel #3
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        ### my code ###
        ## segmentaion の annotation を一旦退避して、後で追加する
        seg_bk = [
            dictwk["segmentation"] for dictwk in dataset_dict["annotations"]
        ]
        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i].pop("segmentation")
        image, dataset_dict = self.aug_handler(
            image=image, dataset_dict_detectron=dataset_dict)
        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i]["segmentation"] = seg_bk[i]
        ### my code ###

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
Beispiel #4
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

#         ################################################################################################################
#         print("AutoAugDet:", dataset_dict["file_name"])
#         h, w, c = image.shape
#         if h <= 0 or w <=0:
#             print("Empty image")
#         if self.autoaugdet and "annotations" in dataset_dict:
#             from detectron2.structures.boxes import BoxMode
#             bboxes = []
#             for label in dataset_dict["annotations"]:
#                 assert label['bbox_mode'] == BoxMode.XYWH_ABS
#                 bboxes.append(label['bbox'])
#             # import cv2, random
#             # showimg_in = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_in, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             try:
#                 image, bboxes = autoaugdet.autoaugdet(image, bboxes, self.autoaugdet)
#             except Exception as  e:
#                 print("AutoAug Error:", e)
#             # showimg_out = image.copy()
#             # for box in bboxes:
#             #     cv2.rectangle(showimg_out, (int(box[0]), int(box[1])), (int(box[0] + box[2]), int(box[1] + box[3])),(random.randint(0,255), random.randint(0,255), random.randint(0,255)))
#             # cv2.imshow("in", showimg_in)
#             # cv2.imshow("out", showimg_out)
#             # cv2.waitKey(0)
#             for i in range(len(bboxes)):
#                 dataset_dict["annotations"][i]['bbox'] = bboxes[i]

#         #################################################################################################       
        
        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)

            image, transforms = T.apply_transform_gens(self.tfm_gens, image)

            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
                
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
            
            # generate heatmaps of keypoints
            #if dataset_dict["instances"].has("gt_keypoints"):
            #
            
            #For segmentation-based detection, transform the instance-level segmentation mask into semantic segmasks and contour maps
            # turning instance-level segmentation map into semantic segmap            
            # get the contour map for segmentation-based detection            
            dataset_dict["contours"], dataset_dict["semseg"] = utils.annotations_to_segmaps(annos, self.num_classes, image_shape)
            
            kpts = [obj.get("keypoints", []) for obj in annos]
            map_shape = (image_shape[0], image_shape[1])
            kp_maps, short_offsets = get_keypoint_maps(None, kpts, map_shape)
            dataset_dict["kp_maps"] = kp_maps.transpose(2, 0, 1)
            dataset_dict["short_offsets"] = short_offsets.transpose(2, 0, 1)
            
            ################################################################
#             # visualize the keypoints
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path

#             image_rgb = image[..., ::-1]
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             binary_masks = kp_maps>0.1
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.shape)
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.shape[1]==image_rgb.shape[0], (binary_masks.shape[1], image_rgb.shape[0])
#             assert binary_masks.shape[2]==image_rgb.shape[1], (binary_masks.shape[2], image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks

#             for i in range(binary_masks.shape[0]):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            ################################################################
            
            ################################################
#             # visualize the segmentation mask
#             from os import path
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             segmask = dataset_dict["semseg"].tensor.numpy()
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
            
#             im = Image.fromarray(np.uint8(image_rgb))
#             filepath = "tmp_segmap_sorted/" + fn_next + '_raw.png'
#             im.save(filepath)
            
#             im2 = Image.fromarray(np.uint8(segmask*3))
#             filepath2 = "tmp_segmap_sorted/" + fn_next + '_seg.png'
#             im2.save(filepath2)
            
            ################################################
            
            ###############
#             # visualize the segmentation map and contours
#             from detectron2.utils.visualizer import Visualizer
#             from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
#             from os import path
#             #V.draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8)            
#             image_rgb = image[..., ::-1]  #utils.read_image(dataset_dict["file_name"], format="RGB")
#             V = Visualizer(image_rgb, dataset_dict)
#             # draw the foreground mask of each object category
#             #binary_masks = dataset_dict["contours"].gt_segmasks.tensor
#             binary_masks = dataset_dict["contours"].gt_contours.tensor
#             _, fn = path.split(dataset_dict["file_name"])
#             fn_next, ext = path.splitext(fn)
#             print('Mask size: ', binary_masks.size())
#             print('Image size: ', image_rgb.shape)
#             assert binary_masks.size(1)==image_rgb.shape[0], (binary_masks.size(1), image_rgb.shape[0])
#             assert binary_masks.size(2)==image_rgb.shape[1], (binary_masks.size(2), image_rgb.shape[1])
#             assert image_rgb.shape[2]==3, image_rgb.shape[2]
#             bm = binary_masks.numpy()
# #             bm_uint8 = bm.astype("uint8")
# #             print(bm)
#             for i in range(binary_masks.size(0)):
#                 masked_image = V.draw_binary_mask(
#                     bm[i, :, :].squeeze(), color=None, edge_color='r', alpha=0.5, area_threshold=10
#                 ) # COCO_CATEGORIES[i]["color"]
# #                 filepath = "tmp/" + fn_next + '_' + COCO_CATEGORIES[i]["name"] + '.png'
# #                 masked_image.save(filepath)
#             filepath = "tmp/" + fn_next + '.png'
#             masked_image.save(filepath)
            
################################################################################################# 

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
            
        return dataset_dict
Beispiel #5
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.img_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        if "annotations" not in dataset_dict or len(
                dataset_dict["annotations"]) == 0:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    dataset_dict["annotations"],
                    crop_box=self.crop_box,
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(annos,
                                                 image_shape,
                                                 mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = dataset_dict["file_name"].replace(
                    'train2017',
                    'thing_train2017').replace('image/train', 'thing_train')
            else:
                basis_sem_path = dataset_dict["file_name"].replace(
                    'coco',
                    'lvis').replace('train2017',
                                    'thing_train').replace('jpg', 'npz')
            basis_sem_path = basis_sem_path.replace('jpg', 'npz')
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
Beispiel #6
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for annotation in dataset_dict["annotations"]:
                if not self.mask_on:
                    annotation.pop("segmentation", None)
                if not self.keypoint_on:
                    annotation.pop("keypoints", None)
                if not self.fiberwidth_on:
                    annotation.pop("fiberwidth", None)
                if not self.fiberlength_on:
                    annotation.pop("fiberlength", None)

            annotations = [
                obj for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]

            num_keypoints = self.cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS
            annotations = [
                transformation.interpolate_keypoints(obj, num_keypoints)
                for obj in annotations
            ]

            annotations = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in annotations
            ]

            annotations = [
                transformation.transform_instance_keypoint_order(
                    obj, self.cfg) for obj in annotations
            ]

            instances = utils.annotations_to_instances(
                annotations, image_shape, mask_format=self.mask_format)

            if len(annotations
                   ) and "fiberwidth" in annotations[0] and self.fiberwidth_on:
                gt_fiberwidth = torch.tensor(
                    [obj["fiberwidth"] for obj in annotations])
                instances.gt_fiberwidth = gt_fiberwidth

            if len(annotations) and "fiberlength" in annotations[
                    0] and self.fiberlength_on:
                gt_fiberlength = torch.tensor(
                    [obj["fiberlength"] for obj in annotations])
                instances.gt_fiberlength = gt_fiberlength

            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Beispiel #7
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        #print("BELOW IS THE dataset_dict (FOR DEBUGGING)")
        #print(dataset_dict)
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(
                dataset_dict["file_name"], format=self.image_format
            )
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        boxes = np.asarray(
            [
                BoxMode.convert(
                    instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS
                )
                for instance in dataset_dict["annotations"]
            ]
        )
        aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1))
        )
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("train2017", "thing_train2017")
                    .replace("image/train", "thing_train")
                )
            else:
                basis_sem_path = (
                    dataset_dict["file_name"]
                    .replace("coco", "lvis")
                    .replace("train2017", "thing_train")
                )
            # change extension to npz
            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        # this place you can add your own code to change the input image

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)
                if not self.attribute_on:
                    anno.pop("attribute_ids")

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances_with_attributes(
                annos,
                image_shape,
                mask_format=self.mask_format,
                load_attributes=self.attribute_on,
                max_attr_per_ins=self.max_attr_per_ins)
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        try:
            utils.check_image_size(dataset_dict, image)
        except Exception as e:
            print(e)
            import moxing as mox
            mox.file.copy_parallel(dataset_dict["file_name"],
                                   's3://bucket-6756/liangxiwen/result/haitian_semi/unbiased-teacher/wrong_imgs/' +
                                   dataset_dict["file_name"].split('/')[-1])
            print(image.shape)
            image = np.rot90(image)
            print(image.shape)
            utils.check_image_size(dataset_dict, image)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L"
            ).squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentation)
        image_weak_aug, sem_seg_gt = aug_input.image, aug_input.sem_seg
        image_shape = image_weak_aug.shape[:2]  # h, w

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )

            if self.compute_tight_boxes and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()

            bboxes_d2_format = utils.filter_empty_instances(instances)
            dataset_dict["instances"] = bboxes_d2_format

        # apply strong augmentation
        # We use torchvision augmentation, which is not compatiable with
        # detectron2, which use numpy format for images. Thus, we need to
        # convert to PIL format first.
        image_pil = Image.fromarray(image_weak_aug.astype("uint8"), "RGB")
        image_strong_aug = np.array(self.strong_augmentation(image_pil))
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image_strong_aug.transpose(2, 0, 1))
        )

        dataset_dict_key = copy.deepcopy(dataset_dict)
        dataset_dict_key["image"] = torch.as_tensor(
            np.ascontiguousarray(image_weak_aug.transpose(2, 0, 1))
        )
        assert dataset_dict["image"].size(1) == dataset_dict_key["image"].size(1)
        assert dataset_dict["image"].size(2) == dataset_dict_key["image"].size(2)
        return (dataset_dict, dataset_dict_key)
Beispiel #10
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.img_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        if "annotations" not in dataset_dict or len(
                dataset_dict["annotations"]) == 0:
            image, transforms = T.apply_augmentations(
                ([self.crop] if self.crop else []) + self.augmentation, image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop:
                crop_tfm = gen_crop_transform_with_instance(
                    self.crop.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    dataset_dict["annotations"],
                    crop_box=self.crop_box,
                )
                image = crop_tfm.apply_image(image)
            try:
                image, transforms = T.apply_augmentations(
                    self.augmentation, image)
            except ValueError as e:
                print(dataset_dict["file_name"])
                raise e
            if self.crop:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            dataset_dict.pop("pano_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = annotations_to_instances(annos,
                                                 image_shape,
                                                 mask_format=self.mask_format)

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.crop and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = dataset_dict["file_name"].replace(
                    'train2017',
                    'thing_train2017').replace('image/train', 'thing_train')
            else:
                basis_sem_path = dataset_dict["file_name"].replace(
                    'coco', 'lvis').replace('train2017', 'thing_train')
            # change extension to npz
            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt
        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = self.load_image(dataset_dict)
        prob = rand_range()
        if self.is_train:
            # mosaic and mixup should only apply one
            try:
                if prob <= self.mosaic_prob:
                    image, dataset_dict = self.load_mosaic(image, dataset_dict)
                elif prob <= self.mixup_prob:
                    image, dataset_dict = self.load_mixup(image, dataset_dict)
            except:
                print("mosaic or mixup augmentation error!! ")

            # apply cutout
            # if random.random() < self.cutout_prob:
            #     image, dataset_dict = cutout(image, dataset_dict)

        # apply albumentations transform
        if self.img_format == "BGR":
            image = image[..., ::-1]  # albumentations use rgb image as input
        # one of [hsv, brightness_contrast]
        # if self.is_train:
        #     if random.random() < 0.5:
        #         image = augment_hsv(image, 0.014, 0.68, 0.36)       # yolov5 hyp
        #     else:
        #         image = augment_brightness_contrast(image)

        augment_anno = {"image": image, "bboxes": [], "category_id": []}
        if "annotations" in dataset_dict:
            augment_anno["bboxes"] = [
                x['bbox'] for x in dataset_dict["annotations"]
            ]
            augment_anno["category_id"] = [
                x['category_id'] for x in dataset_dict["annotations"]
            ]
        # do augmentation
        augment_anno = self._albumentations_tfm(**augment_anno)

        image = augment_anno["image"]
        if self.img_format == "BGR":
            image = image[..., ::-1]  # translate back to bgr
        if len(augment_anno["bboxes"]) > 0:
            dataset_dict["annotations"] = [{
                "category_id": category_id,
                "bbox": bbox,
                "iscrowd": 0,
                "area": bbox[2] * bbox[3],
                "bbox_mode": BoxMode.XYWH_ABS,
            } for bbox, category_id in zip(augment_anno["bboxes"],
                                           augment_anno["category_id"])]
        else:
            dataset_dict["annotations"] = []

        # apply detectron2 transform
        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            # only transform image
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        # if not self.is_train:
        #     # USER: Modify this if you want to keep them for some reason.
        #     dataset_dict.pop("annotations", None)
        #     dataset_dict.pop("sem_seg_file_name", None)
        #     return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            # transform only annotations, because the image has transformed before
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        return dataset_dict
Beispiel #12
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in centernet Dataset format.

        Returns:
            dict: a format that builtin models in centernet accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)
        #draw image
        # for anno in dataset_dict['annotations']:
        #     bbox = anno['bbox']
        #     import cv2
        #     cv2.rectangle(image,
        #                   (int(bbox[0]), int(bbox[1])),
        #                   (int(bbox[2]), int(bbox[3])),
        #                   (0, 255, 0),
        #                   2)
        # cv2.imwrite('result.jpg',image)
        # import pdb;
        # pdb.set_trace()

        if self.kd_without_label or "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # apply imgaug
        if self.is_train and self.imgaug_prob < 1.0:
            image = arguementation(image, self.imgaug_prob)

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train and not self.eval_with_gt:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict and not self.kd_without_label:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(
                instances, box_threshold=self.BOX_MINSIZE)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt
        return dataset_dict
Beispiel #13
0
    def __call__(self, dataset_dict):
        """
        Transform the dataset_dict according to the configured transformations.

        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a new dict that's going to be processed by the model.
                It currently does the following:
                1. Read the image from "file_name"
                2. Transform the image and annotations
                3. Prepare the annotations to :class:`Instances`
        """
        # get 3D models for each annotations and remove 3D mesh models from image dict
        mesh_models = []
        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                mesh_models.append(
                    [
                        self._all_mesh_models[anno["mesh"]][0].clone(),
                        self._all_mesh_models[anno["mesh"]][1].clone(),
                    ]
                )

        dataset_dict = {key: value for key, value in dataset_dict.items() if key != "mesh_models"}
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        if "annotations" in dataset_dict:
            for i, anno in enumerate(dataset_dict["annotations"]):
                anno["mesh"] = mesh_models[i]

        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        image, transforms = T.apply_transform_gens(self.tfm_gens, image)

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            annos = [
                self.transform_annotations(obj, transforms, image_shape)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            # Should not be empty during training
            instances = annotations_to_instances(annos, image_shape)
            dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]

        return dataset_dict
Beispiel #14
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        original_image = image

        if self.crop_gen is None or np.random.rand() > 0.5:
            tfm_gens = self.tfm_gens
        else:
            tfm_gens = self.tfm_gens[:-1] + self.crop_gen + self.tfm_gens[-1:]

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(original_image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(tfm_gens)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if self.proposal_topk is not None:
            utils.transform_proposals(dataset_dict,
                                      image_shape,
                                      transforms,
                                      proposal_topk=self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            return dataset_dict

        if type(transforms[0]) is FT.NoOpTransform:
            flip = 0
        elif type(transforms[0]) is FT.HFlipTransform:
            flip = 1
        else:
            flip = 2
        dataset_dict["flip"] = flip

        if sem_seg_gt is not None:
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            if self.sem_seg_unlabeled_region_on:
                sem_seg_gt[sem_seg_gt ==
                           self.ignore_value] = self.num_sem_seg_classes
            dataset_dict["sem_seg"] = sem_seg_gt

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(obj, transforms,
                                                     image_shape)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(annos, image_shape)
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

            if self.unseen_label_set is not None:
                dataset_dict["instances"] = filter_unseen_class(
                    dataset_dict["instances"], self.unseen_label_set)

        if self.unlabeled_region_on:
            if self.sem_seg_unlabeled_region_on:
                cum_sem_seg = cum_map(dataset_dict["sem_seg"],
                                      self.num_sem_seg_classes)
            else:
                cum_sem_seg = cum_map(dataset_dict["sem_seg"],
                                      self.ignore_value)
            dataset_dict["integral_sem_seg"] = cum_sem_seg

        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)
        utils.check_image_size(dataset_dict, image)
        image = transform(image=image)["image"]

        ############################################################################

        '''
        image = utils.read_image(dataset_dict["file_name"], format=self.image_format)

        
        h, w, _ = image.shape
        utils.check_image_size(dataset_dict, image)
        
        bboxes = [ann["bbox"] for ann in dataset_dict['annotations']]
        labels = [ann['category_id'] for ann in dataset_dict['annotations']]
        class_labels = [CLASSES[label] for label in labels]

        segmentations = [ann["segmentation"] for ann in dataset_dict['annotations']]
        #cprint("before :" , segmentations)
        masks = convert_coco_poly_to_mask(segmentations, h, w)
        masks = [mask.numpy() for mask in masks]      
        
        transformed = transform(image=image, bboxes=bboxes, class_labels=class_labels, masks=masks)

        image = transformed["image"]
        bboxes = transformed["bboxes"]

        class_labels = transformed["class_labels"]
        labels = [CLASSES.index(cl) for cl in class_labels]

        
        filtered_masks = []
        
        
        for mask in transformed["masks"]:
            #if len(np.unique(mask)) > 1:
                filtered_masks.append(mask)

        
        if len(bboxes) != len(filtered_masks):
            print(len(bboxes), len(filtered_masks), len(labels))
        
        #print(len(bboxes), len(masks), len(labels))
        seg_masks = [binary_mask_to_polygon(mask, tolerance=2) for mask in masks]
        
        for idx in range(len(labels)):
            dataset_dict['annotations'][idx]["bbox"] = bboxes[idx]
            dataset_dict['annotations'][idx]["labels"] = labels[idx]
            dataset_dict['annotations'][idx]["segmentation"] = seg_masks[idx]
        
        dataset_dict['annotations'] = dataset_dict['annotations'][:len(labels)]
        '''
        
        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        aug_input = T.StandardAugInput(image, sem_seg=sem_seg_gt)
        transforms = aug_input.apply_augmentations(self.augmentations)
        image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.
        if self.proposal_topk is not None:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, proposal_topk=self.proposal_topk
            )

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.instance_mask_format
            )

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # the intersection of original bounding box and the cropping box.
            if self.recompute_boxes:
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        return dataset_dict
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file

        category = dataset_dict["annotations"][0]['category_id']

        try:
            image = utils.read_image(dataset_dict["file_name"],
                                     format=self.image_format)
        except Exception as e:
            print(dataset_dict["file_name"])
            print(e)
            raise e
        try:
            utils.check_image_size(dataset_dict, image)
        except SizeMismatchError as e:
            expected_wh = (dataset_dict["width"], dataset_dict["height"])
            image_wh = (image.shape[1], image.shape[0])
            if (image_wh[1], image_wh[0]) == expected_wh:
                print("transposing image {}".format(dataset_dict["file_name"]))
                image = image.transpose(1, 0, 2)
            else:
                raise e

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = utils.read_image(
                dataset_dict.pop("sem_seg_file_name"), "L").squeeze(2)
        else:
            sem_seg_gt = None

        if (int(category) != 5):
            boxes = np.asarray([
                BoxMode.convert(instance["bbox"], instance["bbox_mode"],
                                BoxMode.XYXY_ABS)
                for instance in dataset_dict["annotations"]
            ])
            aug_input = T.StandardAugInput(image,
                                           boxes=boxes,
                                           sem_seg=sem_seg_gt)
            transforms = aug_input.apply_augmentations(self.augmentation)
            image, sem_seg_gt = aug_input.image, aug_input.sem_seg
        else:
            boxes = np.asarray([0])

        # aug_input = T.StandardAugInput(image, boxes=boxes, sem_seg=sem_seg_gt)
        # transforms = aug_input.apply_augmentations(self.augmentation)
        # image, sem_seg_gt = aug_input.image, aug_input.sem_seg

        image_shape = image.shape[:2]  # h, w
        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        if sem_seg_gt is not None:
            dataset_dict["sem_seg"] = torch.as_tensor(
                sem_seg_gt.astype("long"))

        # USER: Remove if you don't use pre-computed proposals.
        # Most users would not need this feature.

        if self.proposal_topk:
            if (int(category) != 5):
                utils.transform_proposals(
                    dataset_dict,
                    image_shape,
                    transforms,
                    proposal_topk=self.proposal_topk,
                    min_box_size=self.proposal_min_box_size,
                )

        if not self.is_train:
            if (int(category) != 5):
                dataset_dict.pop("annotations", None)
                dataset_dict.pop("sem_seg_file_name", None)
                dataset_dict.pop("pano_seg_file_name", None)
                return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.use_instance_mask:
                    anno.pop("segmentation", None)
                if not self.use_keypoint:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            if (int(category) != 5):
                annos = [
                    transform_instance_annotations(
                        obj,
                        transforms,
                        image_shape,
                        keypoint_hflip_indices=self.keypoint_hflip_indices,
                    ) for obj in dataset_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
            segment_transform = transf.Compose([
                myTransform.FreeScaleMask((60, 100)),
                myTransform.MaskToTensor(),
            ])
            img_transform = transf.Compose([
                transf.Resize((288, 800)),
                transf.ToTensor(),
                transf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
            ])
            if self.is_train:
                use_ax = True
            else:
                use_ax = False

            if (int(category) != 5):
                # dataset_dict['seg_label'] = torch.zeros([36,100,3])
                # dataset_dict['cls_label'] = [[-1 for _ in range(4)] for _ in range(18)]
                instances = annotations_to_instances(
                    annos, image_shape, mask_format=self.instance_mask_format)
                if self.recompute_boxes:
                    instances.gt_boxes = instances.gt_masks.get_bounding_boxes(
                    )
                dataset_dict["instances"] = utils.filter_empty_instances(
                    instances)

            else:
                cl = LaneClsDataset(
                    '/home/ghr/hdd/traffic_sign/only_lane/images/CULANE_288',
                    img_path=dataset_dict['file_name'],
                    row_anchor=culane_row_anchor,
                    seg_path=dataset_dict['annotations'][0]['lanefilepath'],
                    segment_transform=segment_transform,
                    use_aux=use_ax)
                if use_ax:
                    img, cls, seg = cl.get_item()
                else:
                    img, cls = cl.get_item()
                    seg = 0
                # print('hahahahahahahahahah')img
                # import pdb; pdb.set_trace()
                # dataset_dict["image"] = img
                dataset_dict['seg_label'] = seg
                dataset_dict['cls_label'] = cls
                #instances = annotations_to_instances(dataset_dict['annotations'], image_shape, mask_format=self.instance_mask_format)
            # Call lane class return label,...

            # After transforms such as cropping are applied, the bounding box may no longer
            # tightly bound the object. As an example, imagine a triangle object
            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
            # if self.recompute_boxes:
            #     instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            # dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if self.basis_loss_on and self.is_train:
            # load basis supervisions
            if self.ann_set == "coco":
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "train2017",
                    "thing_train2017").replace("image/train", "thing_train"))
            else:
                basis_sem_path = (dataset_dict["file_name"].replace(
                    "coco", "lvis").replace("train2017", "thing_train"))
            # change extension to npz

            basis_sem_path = osp.splitext(basis_sem_path)[0] + ".npz"
            basis_sem_gt = np.load(basis_sem_path)["mask"]
            basis_sem_gt = transforms.apply_segmentation(basis_sem_gt)
            basis_sem_gt = torch.as_tensor(basis_sem_gt.astype("long"))
            dataset_dict["basis_sem"] = basis_sem_gt

        return dataset_dict
Beispiel #17
0
    def _original_call(self, dataset_dict):
        """
        Modified from detectron2's original __call__ in DatasetMapper
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below

        image = self._read_image(dataset_dict, format=self.img_format)
        if not self.backfill_size:
            utils.check_image_size(dataset_dict, image)

        image, dataset_dict = self._custom_transform(image, dataset_dict)

        inputs = AugInput(image=image)
        if "annotations" not in dataset_dict:
            transforms = AugmentationList(
                ([self.crop_gen] if self.crop_gen else []) +
                self.tfm_gens)(inputs)
            image = inputs.image
        else:
            # pass additional arguments, will only be used when the Augmentation
            #   takes `annotations` as input
            inputs.annotations = dataset_dict["annotations"]
            # Crop around an instance if there are instances in the image.
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                inputs.image = crop_tfm.apply_image(image)
            transforms = AugmentationList(self.tfm_gens)(inputs)
            image = inputs.image
            if self.crop_gen:
                transforms = crop_tfm + transforms

        # Cache identical transforms in dataset_dict for subclass mappers
        # TODO T122215878 Find more explicit way to expose transforms used
        dataset_dict["transforms"] = transforms

        image_shape = image.shape[:2]  # h, w
        if image.ndim == 2:
            image = np.expand_dims(image, 2)
        dataset_dict["image"] = torch.as_tensor(
            image.transpose(2, 0, 1).astype("float32"))
        # Can use uint8 if it turns out to be slow some day

        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict,
                image_shape,
                transforms,
                proposal_topk=self.proposal_topk,
                min_box_size=self.proposal_min_box_size,
            )

        if not self.is_train:
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices,
                ) for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        if "sem_seg_file_name" in dataset_dict:
            sem_seg_gt = read_sem_seg_file_with_prefetch(
                dataset_dict.pop("sem_seg_file_name"),
                prefetched=dataset_dict.get(PREFETCHED_SEM_SEG_FILE_NAME,
                                            None),
            )
            if len(sem_seg_gt.shape) > 2:
                sem_seg_gt = sem_seg_gt.squeeze(2)
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        # extend standard D2 semantic segmentation to support multiple segmentation
        # files, each file can represent a class
        if "multi_sem_seg_file_names" in dataset_dict:
            raise NotImplementedError()

        if "_post_process_" in dataset_dict:
            proc_func = dataset_dict.pop("_post_process_")
            dataset_dict = proc_func(dataset_dict)

        return dataset_dict
Beispiel #18
0
    def __call__(self, dataset_dicts):
        """
        Args:
            dataset_dicts (list[dict]): Metadata of one frame batch,
            each frame in Detectron2 Dataset format.

        Returns:
            list[dict]: batch of N input frames each item in 
            a format that builtin models in detectron2 accept
        """
        output_dicts = []

        transforms = None
        dataset_dicts = copy.deepcopy(
            dataset_dicts)  # it will be modified by code below
        for frame_dict in dataset_dicts:
            image = read_image(frame_dict["file_name"], format=self.img_format)
            utils.check_image_size(frame_dict, image)

            # First, generate the TransformList for the first image (It has random components!).
            # Then, apply the same transformations fo the next images.
            # This way, we are applying the same transformation to the whole batch.
            if transforms is None:
                image, transforms = T.apply_transform_gens(
                    self.tfm_gens, image)
            else:
                image = transforms.apply_image(image)

            image_shape = image.shape[:2]  # h, w

            # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
            # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
            # Therefore it's important to use torch.Tensor.
            frame_dict["image"] = torch.as_tensor(
                np.ascontiguousarray(image.transpose(2, 0, 1)))

            # USER: Remove if you don't use pre-computed proposals.
            if self.load_proposals:
                utils.transform_proposals(frame_dict, image_shape, transforms,
                                          self.min_box_side_len,
                                          self.proposal_topk)

            if not self.is_train:
                # USER: Modify this if you want to keep them for some reason.
                frame_dict.pop("annotations", None)
                frame_dict.pop("sem_seg_file_name", None)
                return frame_dict

            if "annotations" in frame_dict:
                # USER: Modify this if you want to keep them for some reason.
                for anno in frame_dict["annotations"]:
                    anno.pop("segmentation", None)
                    anno.pop("keypoints", None)

                # USER: Implement additional transformations if you have other types of data
                annos = [
                    utils.transform_instance_annotations(
                        obj, transforms, image_shape)
                    for obj in frame_dict.pop("annotations")
                    if obj.get("iscrowd", 0) == 0
                ]
                instances = annotations_to_instances(annos, image_shape)

                frame_dict["instances"] = utils.filter_empty_instances(
                    instances)

            output_dicts.append(frame_dict)
        return output_dicts