def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: if config.split == "train": images_dp, ann_dp = Demultiplexer(resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) else: images_dp, ann_dp = resource_dps images_dp = Filter(images_dp, path_comparator("suffix", ".ppm")) # The order of the image files in the the .zip archives perfectly match the order of the entries in # the (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper. ann_dp = CSVDictParser(ann_dp, delimiter=";") dp = Zipper(images_dp, ann_dp) dp = hint_sharding(dp) dp = hint_shuffling(dp) dp = Mapper(dp, partial(self._collate_and_decode, decoder=decoder)) return dp
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = CSVDictParser(dp) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _generate_categories(self) -> List[str]: self._year = "2011" resources = self._resources() dp = resources[0].load(self._root) dp = Filter(dp, path_comparator("name", "classes.txt")) dp = CSVDictParser(dp, fieldnames=("label", "category"), dialect="cub200") return [row["category"].split(".")[1] for row in dp]
def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.info.make_config(year="2011") resources = self.resources(config) dp = resources[0].load(root) dp = Filter(dp, path_comparator("name", "classes.txt")) dp = CSVDictParser(dp, fieldnames=("label", "category"), dialect="cub200") return [row["category"].split(".")[1] for row in dp]
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = CSVDictParser(dp) dp = hint_sharding(dp) dp = hint_shuffling(dp) return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps images_dp = Filter(images_dp, self._filter_images) split_and_classification_dp, segmentations_dp = Demultiplexer( anns_dp, 2, self._classify_anns, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_and_classification_dp = Filter( split_and_classification_dp, path_comparator("name", f"{config.split}.txt")) split_and_classification_dp = CSVDictParser( split_and_classification_dp, fieldnames=("image_id", "label", "species"), delimiter=" ") split_and_classification_dp = hint_sharding( split_and_classification_dp) split_and_classification_dp = hint_shuffling( split_and_classification_dp) segmentations_dp = Filter(segmentations_dp, self._filter_segmentations) anns_dp = IterKeyZipper( split_and_classification_dp, segmentations_dp, key_fn=getitem("image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( anns_dp, images_dp, key_fn=getitem(0, "image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _generate_categories(self) -> List[str]: resources = self._resources() dp = resources[1].load(self._root) dp = Filter(dp, self._filter_split_and_classification_anns) dp = Filter(dp, path_comparator("name", "trainval.txt")) dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ") raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp} raw_categories, _ = zip( *sorted(raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1])) ) return [" ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories]
def _generate_categories(self, root: pathlib.Path) -> List[str]: config = self.default_config dp = self.resources(config)[1].load(pathlib.Path(root) / self.name) dp = Filter(dp, self._filter_split_and_classification_anns) dp = Filter(dp, path_comparator("name", f"{config.split}.txt")) dp = CSVDictParser(dp, fieldnames=("image_id", "label"), delimiter=" ") raw_categories_and_labels = {(data["image_id"].rsplit("_", 1)[0], data["label"]) for data in dp} raw_categories, _ = zip(*sorted( raw_categories_and_labels, key=lambda raw_category_and_label: int(raw_category_and_label[1]))) return [ " ".join(part.title() for part in raw_category.split("_")) for raw_category in raw_categories ]
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: images_dp, anns_dp = resource_dps images_dp = Filter(images_dp, self._filter_images) split_and_classification_dp, segmentations_dp = Demultiplexer( anns_dp, 2, self._classify_anns, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE, ) split_and_classification_dp = Filter( split_and_classification_dp, path_comparator("name", f"{self._split}.txt")) split_and_classification_dp = CSVDictParser( split_and_classification_dp, fieldnames=("image_id", "label", "species"), delimiter=" ") split_and_classification_dp = hint_shuffling( split_and_classification_dp) split_and_classification_dp = hint_sharding( split_and_classification_dp) segmentations_dp = Filter(segmentations_dp, self._filter_segmentations) anns_dp = IterKeyZipper( split_and_classification_dp, segmentations_dp, key_fn=getitem("image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( anns_dp, images_dp, key_fn=getitem(0, "image_id"), ref_key_fn=path_accessor("stem"), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _datapipe(self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: if self._split == "train": images_dp, ann_dp = Demultiplexer( resource_dps[0], 2, self._classify_train_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE ) else: images_dp, ann_dp = resource_dps images_dp = Filter(images_dp, path_comparator("suffix", ".ppm")) # The order of the image files in the .zip archives perfectly match the order of the entries in the # (possibly concatenated) .csv files. So we're able to use Zipper here instead of a IterKeyZipper. ann_dp = CSVDictParser(ann_dp, delimiter=";") dp = Zipper(images_dp, ann_dp) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)