def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] splits_dp, joint_categories_dp, images_dp = Demultiplexer( archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE ) splits_dp = Filter(splits_dp, path_comparator("name", f"{config.split}{config.fold}.txt")) splits_dp = LineReader(splits_dp, decode=True, return_path=False) splits_dp = Shuffler(splits_dp, buffer_size=INFINITE_BUFFER_SIZE) splits_dp = hint_sharding(splits_dp) joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ") dp = IterKeyZipper( splits_dp, joint_categories_dp, key_fn=getitem(), ref_key_fn=getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, images_dp, key_fn=getitem(0), ref_key_fn=self._image_key_fn, buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, functools.partial(self._collate_and_decode_sample, decoder=decoder))
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = CSVParser(dp, delimiter=" ") dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: dp = resource_dps[0] dp = CSVParser(dp, delimiter=" ") dp = hint_sharding(dp) dp = hint_shuffling(dp) dp = Mapper( dp, functools.partial(self._collate_and_decode_sample, decoder=decoder)) return dp
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: archive_dp = resource_dps[0] splits_dp, joint_categories_dp, images_dp = Demultiplexer( archive_dp, 3, self._classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) splits_dp = Filter( splits_dp, path_comparator("name", f"{self._split}{self._fold}.txt")) splits_dp = LineReader(splits_dp, decode=True, return_path=False) splits_dp = hint_shuffling(splits_dp) splits_dp = hint_sharding(splits_dp) joint_categories_dp = CSVParser(joint_categories_dp, delimiter=" ") dp = IterKeyZipper( splits_dp, joint_categories_dp, key_fn=getitem(), ref_key_fn=getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, images_dp, key_fn=getitem(0), ref_key_fn=self._image_key_fn, buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper(dp, self._prepare_sample)
def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: prepare_ann_fn: Callable if self._year == "2011": archive_dp, segmentations_dp = resource_dps images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp) split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter(split_dp, self._2011_filter_split) split_dp = Mapper(split_dp, getitem(0)) split_dp = Mapper(split_dp, image_files_map.get) bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200") bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0) anns_dp = IterKeyZipper( bounding_boxes_dp, segmentations_dp, key_fn=getitem(0), ref_key_fn=self._2011_segmentation_key, keep_key=True, buffer_size=INFINITE_BUFFER_SIZE, ) prepare_ann_fn = self._2011_prepare_ann else: # self._year == "2010" split_dp, images_dp, anns_dp = resource_dps split_dp = Filter(split_dp, path_comparator("name", f"{self._split}.txt")) split_dp = LineReader(split_dp, decode=True, return_path=False) split_dp = Mapper(split_dp, self._2010_split_key) anns_dp = Mapper(anns_dp, self._2010_anns_key) prepare_ann_fn = self._2010_prepare_ann split_dp = hint_shuffling(split_dp) split_dp = hint_sharding(split_dp) dp = IterKeyZipper( split_dp, images_dp, getitem(), path_accessor("name"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, anns_dp, getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._prepare_sample, prepare_ann_fn=prepare_ann_fn))
def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, decoder: Optional[Callable[[io.IOBase], torch.Tensor]], ) -> IterDataPipe[Dict[str, Any]]: if config.year == "2011": archive_dp, segmentations_dp = resource_dps images_dp, split_dp, image_files_dp, bounding_boxes_dp = Demultiplexer( archive_dp, 4, self._2011_classify_archive, drop_none=True, buffer_size=INFINITE_BUFFER_SIZE) image_files_dp = CSVParser(image_files_dp, dialect="cub200") image_files_map = dict( (image_id, rel_posix_path.rsplit("/", maxsplit=1)[1]) for image_id, rel_posix_path in image_files_dp) split_dp = CSVParser(split_dp, dialect="cub200") split_dp = Filter( split_dp, functools.partial(self._2011_filter_split, split=config.split)) split_dp = Mapper(split_dp, getitem(0)) split_dp = Mapper(split_dp, image_files_map.get) bounding_boxes_dp = CSVParser(bounding_boxes_dp, dialect="cub200") bounding_boxes_dp = Mapper(bounding_boxes_dp, image_files_map.get, input_col=0) anns_dp = IterKeyZipper( bounding_boxes_dp, segmentations_dp, key_fn=getitem(0), ref_key_fn=self._2011_segmentation_key, keep_key=True, buffer_size=INFINITE_BUFFER_SIZE, ) else: # config.year == "2010" split_dp, images_dp, anns_dp = resource_dps split_dp = Filter(split_dp, path_comparator("name", f"{config.split}.txt")) split_dp = LineReader(split_dp, decode=True, return_path=False) split_dp = Mapper(split_dp, self._2010_split_key) anns_dp = Mapper(anns_dp, self._2010_anns_key) split_dp = hint_sharding(split_dp) split_dp = hint_shuffling(split_dp) dp = IterKeyZipper( split_dp, images_dp, getitem(), path_accessor("name"), buffer_size=INFINITE_BUFFER_SIZE, ) dp = IterKeyZipper( dp, anns_dp, getitem(0), buffer_size=INFINITE_BUFFER_SIZE, ) return Mapper( dp, functools.partial(self._collate_and_decode_sample, year=config.year, decoder=decoder))