Example #1
0
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        images_dp, scenes_dp = Demultiplexer(
            archive_dp,
            2,
            self._classify_archive,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        images_dp = Filter(images_dp, path_comparator("parent.name", self._split))
        images_dp = hint_shuffling(images_dp)
        images_dp = hint_sharding(images_dp)

        if self._split != "test":
            scenes_dp = Filter(scenes_dp, path_comparator("name", f"CLEVR_{self._split}_scenes.json"))
            scenes_dp = JsonParser(scenes_dp)
            scenes_dp = Mapper(scenes_dp, getitem(1, "scenes"))
            scenes_dp = UnBatcher(scenes_dp)

            dp = IterKeyZipper(
                images_dp,
                scenes_dp,
                key_fn=path_accessor("name"),
                ref_key_fn=getitem("image_filename"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        else:
            dp = Mapper(images_dp, self._add_empty_anns)

        return Mapper(dp, self._prepare_sample)
Example #2
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._is_not_background_image)
        images_dp = hint_sharding(images_dp)
        images_dp = hint_shuffling(images_dp)

        anns_dp = Filter(anns_dp, self._is_ann)

        dp = IterKeyZipper(
            images_dp,
            anns_dp,
            key_fn=self._images_key_fn,
            ref_key_fn=self._anns_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
            keep_key=True,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
Example #3
0
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            self._classify_archive,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_dp = Filter(split_dp, functools.partial(self._is_in_folder, name=self._split_folder))
        split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_shuffling(split_dp)
        split_dp = hint_sharding(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(dp, self._prepare_sample)
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{config.split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
Example #5
0
    def _generate_categories(self) -> List[str]:
        resources = self._resources()

        dp = resources[1].load(self._root)
        dp = Filter(dp, self._filter_split_and_classification_anns)
        dp = Filter(dp, path_comparator("name", "trainval.txt"))
        dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ")

        raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp}
        raw_categories, _ = zip(
            *sorted(raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1]))
        )
        return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories]
Example #6
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]

        splits_dp, joint_categories_dp, images_dp = Demultiplexer(
            archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
        )

        splits_dp = Filter(splits_dp, path_comparator("name", f"{config.split}{config.fold}.txt"))
        splits_dp = LineReader(splits_dp, decode=True, return_path=False)
        splits_dp = Shuffler(splits_dp, buffer_size=INFINITE_BUFFER_SIZE)
        splits_dp = hint_sharding(splits_dp)

        joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ")

        dp = IterKeyZipper(
            splits_dp,
            joint_categories_dp,
            key_fn=getitem(),
            ref_key_fn=getitem(0),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        dp = IterKeyZipper(
            dp,
            images_dp,
            key_fn=getitem(0),
            ref_key_fn=self._image_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
Example #7
0
    def _generate_categories(self) -> List[str]:
        resources = self._resources()

        dp = resources[0].load(self._root)
        dp = Filter(dp, self._is_not_background_image)

        return sorted({pathlib.Path(path).parent.name for path, _ in dp})
Example #8
0
File: dtd.py Project: nairbv/vision
    def _generate_categories(self, root: pathlib.Path) -> List[str]:
        resources = self.resources(self.default_config)

        dp = resources[0].load(root)
        dp = Filter(dp, self._filter_images)

        return sorted({pathlib.Path(path).parent.name for path, _ in dp})
Example #9
0
 def _generate_categories(self, root: pathlib.Path) -> List[str]:
     config = self.default_config
     dp = self.resources(config)[1].load(pathlib.Path(root) / self.name)
     dp = Filter(dp, self._filter_split_and_classification_anns)
     dp = Filter(dp, path_comparator("name", f"{config.split}.txt"))
     dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ")
     raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0],
                                   data["label"])
                                  for data in dp}
     raw_categories, _ = zip(*sorted(
         raw_categories_and_labels,
         key=lambda raw_category_and_label: int(raw_category_and_label[1])))
     return [
         " ".join(part.title() for part in raw_category.split("_"))
         for raw_category in raw_categories
     ]
Example #10
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp, extra_split_dp = resource_dps

        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            self._classify_archive,
            buffer_size=INFINITE_BUFFER_SIZE,
            drop_none=True,
        )
        if config.split == "train_noval":
            split_dp = extra_split_dp

        split_dp = Filter(split_dp,
                          path_comparator("name", f"{config.split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_sharding(split_dp)
        split_dp = hint_shuffling(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(dp, self._prepare_sample)
Example #11
0
    def _generate_categories(self) -> Tuple[Tuple[str, str]]:
        self._annotations = "instances"
        resources = self._resources()

        dp = resources[1].load(self._root)
        dp = Filter(dp, self._filter_meta_files)
        dp = JsonParser(dp)

        _, meta = next(iter(dp))
        # List[Tuple[super_category, id, category]]
        label_data = [
            cast(Tuple[str, int, str], tuple(info.values()))
            for info in meta["categories"]
        ]

        # COCO actually defines 91 categories, but only 80 of them have instances. Still, the category_id refers to the
        # full set. To keep the labels dense, we fill the gaps with N/A. Note that there are only 10 gaps, so the total
        # number of categories is 90 rather than 91.
        _, ids, _ = zip(*label_data)
        missing_ids = set(range(1, max(ids) + 1)) - set(ids)
        label_data.extend([("N/A", id, "N/A") for id in missing_ids])

        # We also add a background category to be used during segmentation.
        label_data.append(("N/A", 0, "__background__"))

        super_categories, _, categories = zip(
            *sorted(label_data, key=lambda info: info[1]))

        return cast(Tuple[Tuple[str, str]],
                    tuple(zip(categories, super_categories)))
Example #12
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:

        if config.split == "train":
            images_dp, ann_dp = Demultiplexer(resource_dps[0],
                                              2,
                                              self._classify_train_archive,
                                              drop_none=True,
                                              buffer_size=INFINITE_BUFFER_SIZE)
        else:
            images_dp, ann_dp = resource_dps
            images_dp = Filter(images_dp, path_comparator("suffix", ".ppm"))

        # The order of the image files in the the .zip archives perfectly match the order of the entries in
        # the (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper.
        ann_dp = CSVDictParser(ann_dp, delimiter=";")
        dp = Zipper(images_dp, ann_dp)

        dp = hint_sharding(dp)
        dp = hint_shuffling(dp)

        dp = Mapper(dp, partial(self._collate_and_decode, decoder=decoder))
        return dp
Example #13
0
def from_data_folder(
    root: Union[str, pathlib.Path],
    *,
    decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = None,
    valid_extensions: Optional[Collection[str]] = None,
    recursive: bool = True,
) -> Tuple[IterDataPipe, List[str]]:
    root = pathlib.Path(root).expanduser().resolve()
    categories = sorted(entry.name for entry in os.scandir(root)
                        if entry.is_dir())
    masks: Union[List[str], str] = [f"*.{ext}" for ext in valid_extensions
                                    ] if valid_extensions is not None else ""
    dp = FileLister(str(root), recursive=recursive, masks=masks)
    dp: IterDataPipe = Filter(
        dp, functools.partial(_is_not_top_level_file, root=root))
    dp = hint_sharding(dp)
    dp = Shuffler(dp, buffer_size=INFINITE_BUFFER_SIZE)
    dp = FileOpener(dp, mode="rb")
    return (
        Mapper(
            dp,
            functools.partial(_collate_and_decode_data,
                              root=root,
                              categories=categories,
                              decoder=decoder)),
        categories,
    )
Example #14
0
 def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, self._is_data_file)
     dp = Mapper(dp, self._unpickle)
     dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
Example #15
0
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, self._is_not_rogue_file)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
Example #16
0
    def _generate_categories(self) -> List[str]:
        resources = self._resources()

        devkit_dp = resources[1].load(self._root)
        meta_dp = Filter(devkit_dp, path_comparator("name", "cars_meta.mat"))
        _, meta_file = next(iter(meta_dp))

        return list(read_mat(meta_file, squeeze_me=True)["class_names"])
Example #17
0
    def _generate_categories(self) -> List[str]:
        resources = self._resources()

        dp = resources[0].load(self._root)
        dp = Filter(dp, path_comparator("name", self._META_FILE_NAME))
        dp = Mapper(dp, self._unpickle)

        return cast(List[str], next(iter(dp))[self._CATEGORIES_KEY])
Example #18
0
 def _make_datapipe(
     self, resource_dps: List[IterDataPipe], *, config: DatasetConfig
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, path_comparator("parent.parent.name", self._SPLIT_NAME_MAPPER[config.split]))
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, self._prepare_sample)
Example #19
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{self._split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, self._prepare_sample)
Example #20
0
    def _generate_categories(self, root: pathlib.Path) -> List[str]:
        config = self.info.make_config(task="detection")

        resource = self.resources(config)[0]
        dp = resource.load(pathlib.Path(root) / self.name)
        dp = Filter(dp, self._filter_detection_anns, fn_kwargs=dict(config=config))
        dp = Mapper(dp, self._parse_detection_ann, input_col=1)

        return sorted({instance["name"] for _, anns in dp for instance in anns["object"]})
Example #21
0
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(
         dp, path_comparator("parent.parent.name", self._split_folder_name))
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
Example #22
0
    def _generate_categories(self, root: pathlib.Path) -> List[str]:
        config = self.info.make_config(split="train")
        resources = self.resources(config)

        devkit_dp = resources[1].load(root)
        meta_dp = Filter(devkit_dp, path_comparator("name", "cars_meta.mat"))
        _, meta_file = next(iter(meta_dp))

        return list(read_mat(meta_file, squeeze_me=True)["class_names"])
Example #23
0
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:

        images_dp, targets_dp = resource_dps
        if self._split == "train":
            targets_dp = Filter(targets_dp, path_comparator("name", "cars_train_annos.mat"))
        targets_dp = StanfordCarsLabelReader(targets_dp)
        dp = Zipper(images_dp, targets_dp)
        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)
        return Mapper(dp, self._prepare_sample)
Example #24
0
File: voc.py Project: nairbv/vision
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
    ) -> IterDataPipe[Dict[str, Any]]:
        archive_dp = resource_dps[0]
        split_dp, images_dp, anns_dp = Demultiplexer(
            archive_dp,
            3,
            functools.partial(self._classify_archive, config=config),
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_dp = Filter(
            split_dp,
            functools.partial(self._is_in_folder,
                              name=self._SPLIT_FOLDER[config.task]))
        split_dp = Filter(split_dp,
                          path_comparator("name", f"{config.split}.txt"))
        split_dp = LineReader(split_dp, decode=True)
        split_dp = hint_sharding(split_dp)
        split_dp = hint_shuffling(split_dp)

        dp = split_dp
        for level, data_dp in enumerate((images_dp, anns_dp)):
            dp = IterKeyZipper(
                dp,
                data_dp,
                key_fn=getitem(*[0] * level, 1),
                ref_key_fn=path_accessor("stem"),
                buffer_size=INFINITE_BUFFER_SIZE,
            )
        return Mapper(
            dp,
            functools.partial(
                self._prepare_sample,
                prepare_ann_fn=self._prepare_detection_ann if config.task
                == "detection" else self._prepare_segmentation_ann,
            ),
        )
Example #25
0
    def _generate_categories(self) -> List[str]:
        self._year = "2011"
        resources = self._resources()

        dp = resources[0].load(self._root)
        dp = Filter(dp, path_comparator("name", "classes.txt"))
        dp = CSVDictParser(dp,
                           fieldnames=("label", "category"),
                           dialect="cub200")

        return [row["category"].split(".")[1] for row in dp]
Example #26
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._is_not_background_image)
        images_dp = hint_shuffling(images_dp)
        images_dp = hint_sharding(images_dp)

        anns_dp = Filter(anns_dp, self._is_ann)

        dp = IterKeyZipper(
            images_dp,
            anns_dp,
            key_fn=self._images_key_fn,
            ref_key_fn=self._anns_key_fn,
            buffer_size=INFINITE_BUFFER_SIZE,
            keep_key=True,
        )
        return Mapper(dp, self._prepare_sample)
Example #27
0
    def _generate_categories(self, root: pathlib.Path) -> List[str]:
        config = self.info.make_config(year="2011")
        resources = self.resources(config)

        dp = resources[0].load(root)
        dp = Filter(dp, path_comparator("name", "classes.txt"))
        dp = CSVDictParser(dp,
                           fieldnames=("label", "category"),
                           dialect="cub200")

        return [row["category"].split(".")[1] for row in dp]
Example #28
0
    def _generate_categories(self) -> List[Tuple[str, ...]]:
        self._split = "val"
        resources = self._resources()

        devkit_dp = resources[1].load(self._root)
        meta_dp = Filter(devkit_dp, self._filter_meta)
        meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)

        categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))
        categories_and_wnids.sort(key=lambda category_and_wnid: category_and_wnid[1])
        return categories_and_wnids
Example #29
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = Filter(dp, functools.partial(self._is_data_file, split=config.split))
     dp = Mapper(dp, self._unpickle)
     dp = CifarFileReader(dp, labels_key=self._LABELS_KEY)
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, self._prepare_sample)
Example #30
0
    def _generate_categories(self,
                             root: pathlib.Path) -> List[Tuple[str, ...]]:
        config = self.info.make_config(split="val")
        resources = self.resources(config)

        devkit_dp = resources[1].load(root)
        meta_dp = Filter(devkit_dp, path_comparator("name", "meta.mat"))
        meta_dp = Mapper(meta_dp, self._extract_categories_and_wnids)

        categories_and_wnids = cast(List[Tuple[str, ...]], next(iter(meta_dp)))
        categories_and_wnids.sort(
            key=lambda category_and_wnid: category_and_wnid[1])
        return categories_and_wnids