Example #1
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]

        splits_dp, joint_categories_dp, images_dp = Demultiplexer(
            archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
        )

        splits_dp = Filter(splits_dp, path_comparator("name", f"{config.split}{config.fold}.txt"))
        splits_dp = LineReader(splits_dp, decode=True, return_path=False)
        splits_dp = Shuffler(splits_dp, buffer_size=INFINITE_BUFFER_SIZE)
        splits_dp = hint_sharding(splits_dp)

        joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ")

        dp = IterKeyZipper(
            splits_dp,
            joint_categories_dp,
            key_fn=getitem(),
            ref_key_fn=getitem(0),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            dp,
            images_dp,
            key_fn=getitem(0),
            ref_key_fn=self._image_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
Example #2
0
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVParser(dp, delimiter=" ")
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
Example #3
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
     decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVParser(dp, delimiter=" ")
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     dp = Mapper(
         dp,
         functools.partial(self._collate_and_decode_sample,
                           decoder=decoder))
     return dp
Example #4
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]

        splits_dp, joint_categories_dp, images_dp = Demultiplexer(
            archive_dp,
            3,
            self._classify_archive,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE)

        splits_dp = Filter(
            splits_dp, path_comparator("name",
                                       f"{self._split}{self._fold}.txt"))
        splits_dp = LineReader(splits_dp, decode=True, return_path=False)
        splits_dp = hint_shuffling(splits_dp)
        splits_dp = hint_sharding(splits_dp)

        joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ")

        dp = IterKeyZipper(
            splits_dp,
            joint_categories_dp,
            key_fn=getitem(),
            ref_key_fn=getitem(0),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            dp,
            images_dp,
            key_fn=getitem(0),
            ref_key_fn=self._image_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, self._prepare_sample)
Example #5
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        prepare_ann_fn: Callable
        if self._year == "2011":
            archive_dp, segmentations_dp = resource_dps
            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
                archive_dp,
                4,
                self._2011_classify_archive,
                drop_none=True,
                buffer_size=INFINITE_BUFFER_SIZE)

            image_files_dp = CSVParser(image_files_dp, dialect="cub200")
            image_files_map = dict(
                (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1])
                for image_id, rel_posix_path in image_files_dp)

            split_dp = CSVParser(split_dp, dialect="cub200")
            split_dp = Filter(split_dp, self._2011_filter_split)
            split_dp = Mapper(split_dp, getitem(0))
            split_dp = Mapper(split_dp, image_files_map.get)

            bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200")
            bounding_boxes_dp = Mapper(bounding_boxes_dp,
                                       image_files_map.get,
                                       input_col=0)

            anns_dp = IterKeyZipper(
                bounding_boxes_dp,
                segmentations_dp,
                key_fn=getitem(0),
                ref_key_fn=self._2011_segmentation_key,
                keep_key=True,
                buffer_size=INFINITE_BUFFER_SIZE,
            )

            prepare_ann_fn = self._2011_prepare_ann
        else:  # self._year == "2010"
            split_dp, images_dp, anns_dp = resource_dps

            split_dp = Filter(split_dp,
                              path_comparator("name", f"{self._split}.txt"))
            split_dp = LineReader(split_dp, decode=True, return_path=False)
            split_dp = Mapper(split_dp, self._2010_split_key)

            anns_dp = Mapper(anns_dp, self._2010_anns_key)

            prepare_ann_fn = self._2010_prepare_ann

        split_dp = hint_shuffling(split_dp)
        split_dp = hint_sharding(split_dp)

        dp = IterKeyZipper(
            split_dp,
            images_dp,
            getitem(),
            path_accessor("name"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            dp,
            anns_dp,
            getitem(0),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(
            dp,
            functools.partial(self._prepare_sample,
                              prepare_ann_fn=prepare_ann_fn))
Example #6
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        if config.year == "2011":
            archive_dp, segmentations_dp = resource_dps
            images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer(
                archive_dp,
                4,
                self._2011_classify_archive,
                drop_none=True,
                buffer_size=INFINITE_BUFFER_SIZE)

            image_files_dp = CSVParser(image_files_dp, dialect="cub200")
            image_files_map = dict(
                (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1])
                for image_id, rel_posix_path in image_files_dp)

            split_dp = CSVParser(split_dp, dialect="cub200")
            split_dp = Filter(
                split_dp,
                functools.partial(self._2011_filter_split, split=config.split))
            split_dp = Mapper(split_dp, getitem(0))
            split_dp = Mapper(split_dp, image_files_map.get)

            bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200")
            bounding_boxes_dp = Mapper(bounding_boxes_dp,
                                       image_files_map.get,
                                       input_col=0)

            anns_dp = IterKeyZipper(
                bounding_boxes_dp,
                segmentations_dp,
                key_fn=getitem(0),
                ref_key_fn=self._2011_segmentation_key,
                keep_key=True,
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        else:  # config.year == "2010"
            split_dp, images_dp, anns_dp = resource_dps

            split_dp = Filter(split_dp,
                              path_comparator("name", f"{config.split}.txt"))
            split_dp = LineReader(split_dp, decode=True, return_path=False)
            split_dp = Mapper(split_dp, self._2010_split_key)

            anns_dp = Mapper(anns_dp, self._2010_anns_key)

        split_dp = hint_sharding(split_dp)
        split_dp = hint_shuffling(split_dp)

        dp = IterKeyZipper(
            split_dp,
            images_dp,
            getitem(),
            path_accessor("name"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            dp,
            anns_dp,
            getitem(0),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              year=config.year,
                              decoder=decoder))