コード例 #1
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, meta_dp = resource_dps

        if self._annotations is None:
            dp = hint_shuffling(images_dp)
            dp = hint_sharding(dp)
            dp = hint_shuffling(dp)
            return Mapper(dp, self._prepare_image)

        meta_dp = Filter(meta_dp, self._filter_meta_files)
        meta_dp = JsonParser(meta_dp)
        meta_dp = Mapper(meta_dp, getitem(1))
        meta_dp: IterDataPipe[Dict[str, Dict[str,
                                             Any]]] = MappingIterator(meta_dp)
        images_meta_dp, anns_meta_dp = Demultiplexer(
            meta_dp,
            2,
            self._classify_meta,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        images_meta_dp = Mapper(images_meta_dp, getitem(1))
        images_meta_dp = UnBatcher(images_meta_dp)

        anns_meta_dp = Mapper(anns_meta_dp, getitem(1))
        anns_meta_dp = UnBatcher(anns_meta_dp)
        anns_meta_dp = Grouper(anns_meta_dp,
                               group_key_fn=getitem("image_id"),
                               buffer_size=INFINITE_BUFFER_SIZE)
        anns_meta_dp = hint_shuffling(anns_meta_dp)
        anns_meta_dp = hint_sharding(anns_meta_dp)

        anns_dp = IterKeyZipper(
            anns_meta_dp,
            images_meta_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=getitem("id"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(1, "file_name"),
            ref_key_fn=path_accessor("name"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, self._prepare_sample)
コード例 #2
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp, extra_split_dp = resource_dps

        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            self._classify_archive,
            buffer_size=INFINITE_BUFFER_SIZE,
            drop_none=True,
        )
        if config.split == "train_noval":
            split_dp = extra_split_dp

        split_dp = Filter(split_dp,
                          path_comparator("name", f"{config.split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_sharding(split_dp)
        split_dp = hint_shuffling(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(dp, self._prepare_sample)
コード例 #3
0
ファイル: voc.py プロジェクト: fizyr-forks/torchvision
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            self._classify_archive,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_dp = Filter(split_dp, functools.partial(self._is_in_folder, name=self._split_folder))
        split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_shuffling(split_dp)
        split_dp = hint_sharding(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(dp, self._prepare_sample)
コード例 #4
0
ファイル: svhn.py プロジェクト: fizyr-forks/torchvision
 def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Mapper(dp, self._read_images_and_labels)
     dp = UnBatcher(dp)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #5
0
ファイル: eurosat.py プロジェクト: fizyr-forks/torchvision
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #6
0
ファイル: gtsrb.py プロジェクト: vballoli/vision
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:

        if config.split == "train":
            images_dp, ann_dp = Demultiplexer(resource_dps[0],
                                              2,
                                              self._classify_train_archive,
                                              drop_none=True,
                                              buffer_size=INFINITE_BUFFER_SIZE)
        else:
            images_dp, ann_dp = resource_dps
            images_dp = Filter(images_dp, path_comparator("suffix", ".ppm"))

        # The order of the image files in the the .zip archives perfectly match the order of the entries in
        # the (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper.
        ann_dp = CSVDictParser(ann_dp, delimiter=";")
        dp = Zipper(images_dp, ann_dp)

        dp = hint_sharding(dp)
        dp = hint_shuffling(dp)

        dp = Mapper(dp, partial(self._collate_and_decode, decoder=decoder))
        return dp
コード例 #7
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._is_not_background_image)
        images_dp = hint_sharding(images_dp)
        images_dp = hint_shuffling(images_dp)

        anns_dp = Filter(anns_dp, self._is_ann)

        dp = IterKeyZipper(
            images_dp,
            anns_dp,
            key_fn=self._images_key_fn,
            ref_key_fn=self._anns_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
            keep_key=True,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
コード例 #8
0
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        images_dp, scenes_dp = Demultiplexer(
            archive_dp,
            2,
            self._classify_archive,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        images_dp = Filter(images_dp, path_comparator("parent.name", self._split))
        images_dp = hint_shuffling(images_dp)
        images_dp = hint_sharding(images_dp)

        if self._split != "test":
            scenes_dp = Filter(scenes_dp, path_comparator("name", f"CLEVR_{self._split}_scenes.json"))
            scenes_dp = JsonParser(scenes_dp)
            scenes_dp = Mapper(scenes_dp, getitem(1, "scenes"))
            scenes_dp = UnBatcher(scenes_dp)

            dp = IterKeyZipper(
                images_dp,
                scenes_dp,
                key_fn=path_accessor("name"),
                ref_key_fn=getitem("image_filename"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        else:
            dp = Mapper(images_dp, self._add_empty_anns)

        return Mapper(dp, self._prepare_sample)
コード例 #9
0
ファイル: usps.py プロジェクト: liuf1990/vision
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = Decompressor(resource_dps[0])
     dp = LineReader(dp, decode=True, return_path=False)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #10
0
 def _make_datapipe(
     self, resource_dps: List[IterDataPipe], *, config: DatasetConfig
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, path_comparator("parent.parent.name", self._SPLIT_NAME_MAPPER[config.split]))
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #11
0
 def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, self._is_data_file)
     dp = Mapper(dp, self._unpickle)
     dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #12
0
ファイル: country211.py プロジェクト: liuf1990/vision
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(
         dp, path_comparator("parent.parent.name", self._split_folder_name))
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #13
0
ファイル: imagenet.py プロジェクト: evdcush/vision
    def _make_datapipe(self, resource_dps: List[IterDataPipe], *,
                       config: DatasetConfig) -> IterDataPipe[Dict[str, Any]]:
        if config.split in {"train", "test"}:
            dp = resource_dps[0]

            # the train archive is a tar of tars
            if config.split == "train":
                dp = TarArchiveReader(dp)

            dp = hint_sharding(dp)
            dp = hint_shuffling(dp)
            dp = Mapper(
                dp, self._prepare_train_data
                if config.split == "train" else self._prepare_test_data)
        else:  # config.split == "val":
            images_dp, devkit_dp = resource_dps

            meta_dp, label_dp = Demultiplexer(devkit_dp,
                                              2,
                                              self._classifiy_devkit,
                                              drop_none=True,
                                              buffer_size=INFINITE_BUFFER_SIZE)

            meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)
            _, wnids = zip(*next(iter(meta_dp)))

            label_dp = LineReader(label_dp, decode=True, return_path=False)
            label_dp = Mapper(
                label_dp,
                functools.partial(self._imagenet_label_to_wnid, wnids=wnids))
            label_dp: IterDataPipe[Tuple[int, str]] = Enumerator(label_dp, 1)
            label_dp = hint_sharding(label_dp)
            label_dp = hint_shuffling(label_dp)

            dp = IterKeyZipper(
                label_dp,
                images_dp,
                key_fn=getitem(0),
                ref_key_fn=self._val_test_image_key,
                buffer_size=INFINITE_BUFFER_SIZE,
            )
            dp = Mapper(dp, self._prepare_val_data)

        return Mapper(dp, self._prepare_sample)
コード例 #14
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, devkit_dp = resource_dps

        if config.split == "train":
            # the train archive is a tar of tars
            dp = TarArchiveReader(images_dp)
            dp = hint_sharding(dp)
            dp = hint_shuffling(dp)
            dp = Mapper(dp, self._collate_train_data)
        elif config.split == "val":
            devkit_dp = Filter(
                devkit_dp,
                path_comparator("name",
                                "ILSVRC2012_validation_ground_truth.txt"))
            devkit_dp = LineReader(devkit_dp, return_path=False)
            devkit_dp = Mapper(devkit_dp, int)
            devkit_dp = Enumerator(devkit_dp, 1)
            devkit_dp = hint_sharding(devkit_dp)
            devkit_dp = hint_shuffling(devkit_dp)

            dp = IterKeyZipper(
                devkit_dp,
                images_dp,
                key_fn=getitem(0),
                ref_key_fn=self._val_test_image_key,
                buffer_size=INFINITE_BUFFER_SIZE,
            )
            dp = Mapper(dp, self._collate_val_data)
        else:  # config.split == "test"
            dp = hint_sharding(images_dp)
            dp = hint_shuffling(dp)
            dp = Mapper(dp, self._collate_test_data)

        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
コード例 #15
0
ファイル: stanford_cars.py プロジェクト: pytorch/vision
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:

        images_dp, targets_dp = resource_dps
        if self._split == "train":
            targets_dp = Filter(targets_dp, path_comparator("name", "cars_train_annos.mat"))
        targets_dp = StanfordCarsLabelReader(targets_dp)
        dp = Zipper(images_dp, targets_dp)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)
コード例 #16
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVParser(dp, delimiter=" ")
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #17
0
ファイル: caltech.py プロジェクト: behxyz/vision
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, self._is_not_rogue_file)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #18
0
ファイル: pcam.py プロジェクト: fizyr-forks/torchvision
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:

        images_dp, targets_dp = resource_dps

        images_dp = PCAMH5Reader(images_dp, key="x")
        targets_dp = PCAMH5Reader(targets_dp, key="y")

        dp = Zipper(images_dp, targets_dp)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)
コード例 #19
0
ファイル: fer2013.py プロジェクト: yoshitomo-matsubara/vision
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
     decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVDictParser(dp)
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
コード例 #20
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{config.split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
コード例 #21
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, functools.partial(self._is_data_file, split=config.split))
     dp = Mapper(dp, self._unpickle)
     dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, self._prepare_sample)
コード例 #22
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
     decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, functools.partial(self._is_data_file, split=config.split))
     dp = Mapper(dp, self._unpickle)
     dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, functools.partial(self._collate_and_decode, decoder=decoder))
コード例 #23
0
def from_data_folder(
    root: Union[str, pathlib.Path],
    *,
    valid_extensions: Optional[Collection[str]] = None,
    recursive: bool = True,
) -> Tuple[IterDataPipe, List[str]]:
    root = pathlib.Path(root).expanduser().resolve()
    categories = sorted(entry.name for entry in os.scandir(root) if entry.is_dir())
    masks: Union[List[str], str] = [f"*.{ext}" for ext in valid_extensions] if valid_extensions is not None else ""
    dp = FileLister(str(root), recursive=recursive, masks=masks)
    dp: IterDataPipe = Filter(dp, functools.partial(_is_not_top_level_file, root=root))
    dp = hint_sharding(dp)
    dp = hint_shuffling(dp)
    dp = FileOpener(dp, mode="rb")
    return Mapper(dp, functools.partial(_prepare_sample, root=root, categories=categories)), categories
コード例 #24
0
    def _make_datapipe(self, resource_dps: List[IterDataPipe], *,
                       config: DatasetConfig) -> IterDataPipe[Dict[str, Any]]:
        images_dp, labels_dp = resource_dps
        start, stop = self.start_and_stop(config)

        images_dp = Decompressor(images_dp)
        images_dp = MNISTFileReader(images_dp, start=start, stop=stop)

        labels_dp = Decompressor(labels_dp)
        labels_dp = MNISTFileReader(labels_dp, start=start, stop=stop)

        dp = Zipper(images_dp, labels_dp)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, functools.partial(self._prepare_sample,
                                            config=config))
コード例 #25
0
ファイル: mnist.py プロジェクト: pytorch/vision
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, labels_dp = resource_dps
        start, stop = self.start_and_stop()

        images_dp = Decompressor(images_dp)
        images_dp = MNISTFileReader(images_dp, start=start, stop=stop)

        labels_dp = Decompressor(labels_dp)
        labels_dp = MNISTFileReader(labels_dp, start=start, stop=stop)

        dp = Zipper(images_dp, labels_dp)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)
コード例 #26
0
ファイル: pcam.py プロジェクト: vballoli/vision
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:

        images_dp, targets_dp = resource_dps

        images_dp = PCAMH5Reader(images_dp, key="x")
        targets_dp = PCAMH5Reader(targets_dp, key="y")

        dp = Zipper(images_dp, targets_dp)
        dp = hint_sharding(dp)
        dp = hint_shuffling(dp)
        return Mapper(dp, self._collate_and_decode)
コード例 #27
0
ファイル: oxford_iiit_pet.py プロジェクト: pytorch/vision
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{self._split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, self._prepare_sample)
コード例 #28
0
ファイル: gtsrb.py プロジェクト: pytorch/vision
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        if self._split == "train":
            images_dp, ann_dp = Demultiplexer(
                resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
            )
        else:
            images_dp, ann_dp = resource_dps
            images_dp = Filter(images_dp, path_comparator("suffix", ".ppm"))

        # The order of the image files in the .zip archives perfectly match the order of the entries in the
        # (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper.
        ann_dp = CSVDictParser(ann_dp, delimiter=";")
        dp = Zipper(images_dp, ann_dp)

        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)

        return Mapper(dp, self._prepare_sample)
コード例 #29
0
ファイル: celeba.py プロジェクト: yoshitomo-matsubara/vision
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        splits_dp, images_dp, identities_dp, attributes_dp, bboxes_dp, landmarks_dp = resource_dps

        splits_dp = CelebACSVParser(splits_dp,
                                    fieldnames=("image_id", "split_id"))
        splits_dp = Filter(
            splits_dp, functools.partial(self._filter_split,
                                         split=config.split))
        splits_dp = hint_sharding(splits_dp)
        splits_dp = hint_shuffling(splits_dp)

        anns_dp = Zipper(*[
            CelebACSVParser(dp, fieldnames=fieldnames) for dp, fieldnames in (
                (identities_dp, ("image_id", "identity")),
                (attributes_dp, None),
                (bboxes_dp, None),
                (landmarks_dp, None),
            )
        ])
        anns_dp = Mapper(anns_dp, self._collate_anns)

        dp = IterKeyZipper(
            splits_dp,
            images_dp,
            key_fn=getitem(0),
            ref_key_fn=path_accessor("name"),
            buffer_size=INFINITE_BUFFER_SIZE,
            keep_key=True,
        )
        dp = IterKeyZipper(dp,
                           anns_dp,
                           key_fn=getitem(0),
                           buffer_size=INFINITE_BUFFER_SIZE)
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
コード例 #30
0
ファイル: voc.py プロジェクト: nairbv/vision
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            functools.partial(self._classify_archive, config=config),
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_dp = Filter(
            split_dp,
            functools.partial(self._is_in_folder,
                              name=self._SPLIT_FOLDER[config.task]))
        split_dp = Filter(split_dp,
                          path_comparator("name", f"{config.split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_sharding(split_dp)
        split_dp = hint_shuffling(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(
            dp,
            functools.partial(
                self._prepare_sample,
                prepare_ann_fn=self._prepare_detection_ann if config.task
                == "detection" else self._prepare_segmentation_ann,
            ),
        )