Esempio n. 1
0
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:

        if config.split == "train":
            images_dp, ann_dp = Demultiplexer(resource_dps[0],
                                              2,
                                              self._classify_train_archive,
                                              drop_none=True,
                                              buffer_size=INFINITE_BUFFER_SIZE)
        else:
            images_dp, ann_dp = resource_dps
            images_dp = Filter(images_dp, path_comparator("suffix", ".ppm"))

        # The order of the image files in the the .zip archives perfectly match the order of the entries in
        # the (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper.
        ann_dp = CSVDictParser(ann_dp, delimiter=";")
        dp = Zipper(images_dp, ann_dp)

        dp = hint_sharding(dp)
        dp = hint_shuffling(dp)

        dp = Mapper(dp, partial(self._collate_and_decode, decoder=decoder))
        return dp
Esempio n. 2
0
 def _datapipe(
         self,
         resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVDictParser(dp)
     dp = hint_shuffling(dp)
     dp = hint_sharding(dp)
     return Mapper(dp, self._prepare_sample)
Esempio n. 3
0
    def _generate_categories(self) -> List[str]:
        self._year = "2011"
        resources = self._resources()

        dp = resources[0].load(self._root)
        dp = Filter(dp, path_comparator("name", "classes.txt"))
        dp = CSVDictParser(dp,
                           fieldnames=("label", "category"),
                           dialect="cub200")

        return [row["category"].split(".")[1] for row in dp]
Esempio n. 4
0
    def _generate_categories(self, root: pathlib.Path) -> List[str]:
        config = self.info.make_config(year="2011")
        resources = self.resources(config)

        dp = resources[0].load(root)
        dp = Filter(dp, path_comparator("name", "classes.txt"))
        dp = CSVDictParser(dp,
                           fieldnames=("label", "category"),
                           dialect="cub200")

        return [row["category"].split(".")[1] for row in dp]
Esempio n. 5
0
 def _make_datapipe(
     self,
     resource_dps: List[IterDataPipe],
     *,
     config: DatasetConfig,
     decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
 ) -> IterDataPipe[Dict[str, Any]]:
     dp = resource_dps[0]
     dp = CSVDictParser(dp)
     dp = hint_sharding(dp)
     dp = hint_shuffling(dp)
     return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
    def _make_datapipe(
        self,
        resource_dps: List[IterDataPipe],
        *,
        config: DatasetConfig,
        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
    ) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{config.split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(
            dp,
            functools.partial(self._collate_and_decode_sample,
                              decoder=decoder))
Esempio n. 7
0
    def _generate_categories(self) -> List[str]:
        resources = self._resources()

        dp = resources[1].load(self._root)
        dp = Filter(dp, self._filter_split_and_classification_anns)
        dp = Filter(dp, path_comparator("name", "trainval.txt"))
        dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ")

        raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp}
        raw_categories, _ = zip(
            *sorted(raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1]))
        )
        return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories]
Esempio n. 8
0
 def _generate_categories(self, root: pathlib.Path) -> List[str]:
     config = self.default_config
     dp = self.resources(config)[1].load(pathlib.Path(root) / self.name)
     dp = Filter(dp, self._filter_split_and_classification_anns)
     dp = Filter(dp, path_comparator("name", f"{config.split}.txt"))
     dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ")
     raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0],
                                   data["label"])
                                  for data in dp}
     raw_categories, _ = zip(*sorted(
         raw_categories_and_labels,
         key=lambda raw_category_and_label: int(raw_category_and_label[1])))
     return [
         " ".join(part.title() for part in raw_category.split("_"))
         for raw_category in raw_categories
     ]
Esempio n. 9
0
    def _datapipe(
            self,
            resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        images_dp, anns_dp = resource_dps

        images_dp = Filter(images_dp, self._filter_images)

        split_and_classification_dp, segmentations_dp = Demultiplexer(
            anns_dp,
            2,
            self._classify_anns,
            drop_none=True,
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        split_and_classification_dp = Filter(
            split_and_classification_dp,
            path_comparator("name", f"{self._split}.txt"))
        split_and_classification_dp = CSVDictParser(
            split_and_classification_dp,
            fieldnames=("image_id", "label", "species"),
            delimiter=" ")
        split_and_classification_dp = hint_shuffling(
            split_and_classification_dp)
        split_and_classification_dp = hint_sharding(
            split_and_classification_dp)

        segmentations_dp = Filter(segmentations_dp, self._filter_segmentations)

        anns_dp = IterKeyZipper(
            split_and_classification_dp,
            segmentations_dp,
            key_fn=getitem("image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )

        dp = IterKeyZipper(
            anns_dp,
            images_dp,
            key_fn=getitem(0, "image_id"),
            ref_key_fn=path_accessor("stem"),
            buffer_size=INFINITE_BUFFER_SIZE,
        )
        return Mapper(dp, self._prepare_sample)
Esempio n. 10
0
    def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]:
        if self._split == "train":
            images_dp, ann_dp = Demultiplexer(
                resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE
            )
        else:
            images_dp, ann_dp = resource_dps
            images_dp = Filter(images_dp, path_comparator("suffix", ".ppm"))

        # The order of the image files in the .zip archives perfectly match the order of the entries in the
        # (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper.
        ann_dp = CSVDictParser(ann_dp, delimiter=";")
        dp = Zipper(images_dp, ann_dp)

        dp = hint_shuffling(dp)
        dp = hint_sharding(dp)

        return Mapper(dp, self._prepare_sample)