def test_resolve_to_gdrive(self, mocker): file_name = "data.tar" original_url = f"http://downloads.pytorch.org/{file_name}" id_sentinel = "id-sentinel" redirected_url = f"https://drive.google.com/file/d/{id_sentinel}/view" sha256_sentinel = "sha256_sentinel" def preprocess_sentinel(path): return path original_resource = HttpResource( original_url, sha256=sha256_sentinel, preprocess=preprocess_sentinel, ) mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url) redirected_resource = original_resource.resolve() assert isinstance(redirected_resource, GDriveResource) assert redirected_resource.id == id_sentinel assert redirected_resource.file_name == file_name assert redirected_resource.sha256 == sha256_sentinel assert redirected_resource._preprocess is preprocess_sentinel
def resources(self, config: DatasetConfig) -> List[OnlineResource]: if config.year == "2011": archive = HttpResource( "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/CUB_200_2011.tgz", sha256= "0c685df5597a8b24909f6a7c9db6d11e008733779a671760afef78feb49bf081", preprocess="decompress", ) segmentations = HttpResource( "http://www.vision.caltech.edu/visipedia-data/CUB-200-2011/segmentations.tgz", sha256= "dc77f6cffea0cbe2e41d4201115c8f29a6320ecb04fffd2444f51b8066e4b84f", preprocess="decompress", ) return [archive, segmentations] else: # config.year == "2010" split = HttpResource( "http://www.vision.caltech.edu/visipedia-data/CUB-200/lists.tgz", sha256= "aeacbd5e3539ae84ea726e8a266a9a119c18f055cd80f3836d5eb4500b005428", preprocess="decompress", ) images = HttpResource( "http://www.vision.caltech.edu/visipedia-data/CUB-200/images.tgz", sha256= "2a6d2246bbb9778ca03aa94e2e683ccb4f8821a36b7f235c0822e659d60a803e", preprocess="decompress", ) anns = HttpResource( "http://www.vision.caltech.edu/visipedia-data/CUB-200/annotations.tgz", sha256= "c17b7841c21a66aa44ba8fe92369cc95dfc998946081828b1d7b8a4b716805c1", preprocess="decompress", ) return [split, images, anns]
def test_resolve_to_http(self, mocker): file_name = "data.tar" original_url = f"http://downloads.pytorch.org/{file_name}" redirected_url = original_url.replace("http", "https") sha256_sentinel = "sha256_sentinel" def preprocess_sentinel(path): return path original_resource = HttpResource( original_url, sha256=sha256_sentinel, preprocess=preprocess_sentinel, ) mocker.patch("torchvision.prototype.datasets.utils._resource._get_redirect_url", return_value=redirected_url) redirected_resource = original_resource.resolve() assert isinstance(redirected_resource, HttpResource) assert redirected_resource.url == redirected_url assert redirected_resource.file_name == file_name assert redirected_resource.sha256 == sha256_sentinel assert redirected_resource._preprocess is preprocess_sentinel
class USPS(Dataset): """USPS Dataset homepage="https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#usps", """ def __init__( self, root: Union[str, pathlib.Path], *, split: str = "train", skip_integrity_check: bool = False, ) -> None: self._split = self._verify_str_arg(split, "split", {"train", "test"}) self._categories = _info()["categories"] super().__init__(root, skip_integrity_check=skip_integrity_check) _URL = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass" _RESOURCES = { "train": HttpResource( f"{_URL}/usps.bz2", sha256= "3771e9dd6ba685185f89867b6e249233dd74652389f263963b3b741e994b034f" ), "test": HttpResource( f"{_URL}/usps.t.bz2", sha256= "a9c0164e797d60142a50604917f0baa604f326e9a689698763793fa5d12ffc4e" ), } def _resources(self) -> List[OnlineResource]: return [USPS._RESOURCES[self._split]] def _prepare_sample(self, line: str) -> Dict[str, Any]: label, *values = line.strip().split(" ") values = [float(value.split(":")[1]) for value in values] pixels = torch.tensor(values).add_(1).div_(2) return dict( image=Image(pixels.reshape(16, 16)), label=Label(int(label) - 1, categories=self._categories), ) def _datapipe( self, resource_dps: List[IterDataPipe]) -> IterDataPipe[Dict[str, Any]]: dp = Decompressor(resource_dps[0]) dp = LineReader(dp, decode=True, return_path=False) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample) def __len__(self) -> int: return 7_291 if self._split == "train" else 2_007
def resources(self, config: DatasetConfig) -> List[OnlineResource]: images = HttpResource( f"{self._IMAGE_URL_BASE}/{config.split}{config.year}.zip", sha256=self._IMAGES_CHECKSUMS[(config.year, config.split)], ) meta = HttpResource( f"{self._META_URL_BASE}/annotations_trainval{config.year}.zip", sha256=self._META_CHECKSUMS[config.year], ) return [images, meta]
def _resources(self) -> List[OnlineResource]: images = HttpResource( f"{self._IMAGE_URL_BASE}/{self._split}{self._year}.zip", sha256=self._IMAGES_CHECKSUMS[(self._year, self._split)], ) meta = HttpResource( f"{self._META_URL_BASE}/annotations_trainval{self._year}.zip", sha256=self._META_CHECKSUMS[self._year], ) return [images, meta]
def _resources(self) -> List[OnlineResource]: archive = HttpResource( "https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz", sha256="6a5a2918d5c73ce032fdeba876574d150d9d04113ab87540a1304cbcc715be53", ) extra_split = HttpResource( "http://home.bharathh.info/pubs/codes/SBD/train_noval.txt", sha256="0b2068f7a359d2907431803e1cd63bf6162da37d7d503b589d3b08c6fd0c2432", ) return [archive, extra_split]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: images = HttpResource( "http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz", sha256= "af6ece2f339791ca20f855943d8b55dd60892c0a25105fcd631ee3d6430f9926", ) anns = HttpResource( "http://www.vision.caltech.edu/Image_Datasets/Caltech101/Annotations.tar", sha256= "1717f4e10aa837b05956e3f4c94456527b143eec0d95e935028b30aff40663d8", ) return [images, anns]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: resources: List[OnlineResource] = [HttpResource(self._URLS[config.split], sha256=self._CHECKSUM[config.split])] if config.split == "train": resources.append(HttpResource(url=self._URLS["car_devkit"], sha256=self._CHECKSUM["car_devkit"])) else: resources.append( HttpResource( self._URLS["cars_test_annos_withlabels"], sha256=self._CHECKSUM["cars_test_annos_withlabels"] ) ) return resources
def _resources(self) -> List[OnlineResource]: images = HttpResource( "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz", sha256="67195c5e1c01f1ab5f9b6a5d22b8c27a580d896ece458917e61d459337fa318d", preprocess="decompress", ) anns = HttpResource( "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz", sha256="52425fb6de5c424942b7626b428656fcbd798db970a937df61750c0f1d358e91", preprocess="decompress", ) return [images, anns]
def _resources(self) -> List[OnlineResource]: resources: List[OnlineResource] = [HttpResource(self._URLS[self._split], sha256=self._CHECKSUM[self._split])] if self._split == "train": resources.append(HttpResource(url=self._URLS["car_devkit"], sha256=self._CHECKSUM["car_devkit"])) else: resources.append( HttpResource( self._URLS["cars_test_annos_withlabels"], sha256=self._CHECKSUM["cars_test_annos_withlabels"] ) ) return resources
def _resources(self) -> List[OnlineResource]: rsrcs: List[OnlineResource] = [HttpResource(self._URLS[self._split], sha256=self._CHECKSUMS[self._split])] if self._split == "test": rsrcs.append( HttpResource( self._URLS["test_ground_truth"], sha256=self._CHECKSUMS["test_ground_truth"], ) ) return rsrcs
def resources(self, config: DatasetConfig) -> List[OnlineResource]: rsrcs: List[OnlineResource] = [ HttpResource(self._URLS[config.split], sha256=self._CHECKSUMS[config.split]) ] if config.split == "test": rsrcs.append( HttpResource( self._URLS["test_ground_truth"], sha256=self._CHECKSUMS["test_ground_truth"], )) return rsrcs
class USPS(Dataset): def _make_info(self) -> DatasetInfo: return DatasetInfo( "usps", homepage= "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass.html#usps", valid_options=dict(split=("train", "test"), ), categories=10, ) _URL = "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/multiclass" _RESOURCES = { "train": HttpResource( f"{_URL}/usps.bz2", sha256= "3771e9dd6ba685185f89867b6e249233dd74652389f263963b3b741e994b034f" ), "test": HttpResource( f"{_URL}/usps.t.bz2", sha256= "a9c0164e797d60142a50604917f0baa604f326e9a689698763793fa5d12ffc4e" ), } def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [USPS._RESOURCES[config.split]] def _prepare_sample(self, line: str) -> Dict[str, Any]: label, *values = line.strip().split(" ") values = [float(value.split(":")[1]) for value in values] pixels = torch.tensor(values).add_(1).div_(2) return dict( image=Image(pixels.reshape(16, 16)), label=Label(int(label) - 1, categories=self.categories), ) def _make_datapipe( self, resource_dps: List[IterDataPipe], *, config: DatasetConfig, ) -> IterDataPipe[Dict[str, Any]]: dp = Decompressor(resource_dps[0]) dp = LineReader(dp, decode=True, return_path=False) dp = hint_shuffling(dp) dp = hint_sharding(dp) return Mapper(dp, self._prepare_sample)
def _resources(self) -> List[OnlineResource]: data = HttpResource( f"http://ufldl.stanford.edu/housenumbers/{self._split}_32x32.mat", sha256=self._CHECKSUMS[self._split], ) return [data]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: archive = HttpResource( "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz", sha256="e42855a52a4950a3b59612834602aa253914755c95b0cff9ead6d07395f8e205", decompress=True, ) return [archive]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [ HttpResource( "https://openaipublic.azureedge.net/clip/data/country211.tgz", sha256="c011343cdc1296a8c31ff1d7129cf0b5e5b8605462cffd24f89266d6e6f4da3c", ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: archive = HttpResource( "https://dl.fbaipublicfiles.com/clevr/CLEVR_v1.0.zip", sha256= "5cd61cf1096ed20944df93c9adb31e74d189b8459a94f54ba00090e5c59936d1", ) return [archive]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: data = HttpResource( "http://archive.ics.uci.edu/ml/machine-learning-databases/semeion/semeion.data", sha256= "f43228ae3da5ea6a3c95069d53450b86166770e3b719dcc333182128fe08d4b1", ) return [data]
def _resources(self) -> List[OnlineResource]: return [ HttpResource( f"https://www.cs.toronto.edu/~kriz/{self._FILE_NAME}", sha256=self._SHA256, ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: file_name, sha256 = (self._TEST_ARCHIVES if config.split == "test" else self._TRAIN_VAL_ARCHIVES)[config.year] archive = HttpResource( f"http://host.robots.ox.ac.uk/pascal/VOC/voc{config.year}/{file_name}", sha256=sha256) return [archive]
def resources(self, config: Optional[DatasetConfig] = None) -> List[OnlineResource]: return [ HttpResource( f"{self._URL_BASE}/emnist-gzip.zip", sha256="909a2a39c5e86bdd7662425e9b9c4a49bb582bf8d0edad427f3c3a9d0c6f7259", ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: data = HttpResource( f"http://ufldl.stanford.edu/housenumbers/{config.split}_32x32.mat", sha256=self._CHECKSUMS[config.split], ) return [data]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [ HttpResource( "https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz", sha256= "85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7", ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [ HttpResource( "https://madm.dfki.de/files/sentinel/EuroSAT.zip", sha256= "8ebea626349354c5328b142b96d0430e647051f26efc2dc974c843f25ecf70bd", ) ]
def _resources(self) -> List[OnlineResource]: return [ HttpResource( f"{self._URL_BASE}/emnist-gzip.zip", sha256= "909a2a39c5e86bdd7662425e9b9c4a49bb582bf8d0edad427f3c3a9d0c6f7259", ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [ HttpResource( "http://www.vision.caltech.edu/Image_Datasets/Caltech256/256_ObjectCategories.tar", sha256= "08ff01b03c65566014ae88eb0490dbe4419fc7ac4de726ee1163e39fd809543e", ) ]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: (images_file, images_sha256), ( labels_file, labels_sha256, ) = self._files_and_checksums(config) url_bases = self._URL_BASE if isinstance(url_bases, str): url_bases = (url_bases,) images_urls = [f"{url_base}/{images_file}" for url_base in url_bases] images = HttpResource(images_urls[0], sha256=images_sha256, mirrors=images_urls[1:]) labels_urls = [f"{url_base}/{labels_file}" for url_base in url_bases] labels = HttpResource(labels_urls[0], sha256=labels_sha256, mirrors=labels_urls[1:]) return [images, labels]
def resources(self, config: DatasetConfig) -> List[OnlineResource]: return [ HttpResource( "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", sha256= "6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce", ) ]
def _resources(self) -> List[OnlineResource]: return [ HttpResource( url="http://data.vision.ee.ethz.ch/cvl/food-101.tar.gz", sha256= "d97d15e438b7f4498f96086a4f7e2fa42a32f2712e87d3295441b2b6314053a4", preprocess="decompress", ) ]