def __init__(self, root: str, train: bool = True, download: bool = False, **kwargs): """Constructor method for the ``ImageClassificationDataset`` class. Args: root: root directory of dataset train: if ``True``, creates dataset from ``train/`` subfolder, otherwise from ``val/`` download: if ``True``, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again **kwargs: """ # downlad dataset if needed if download and not os.path.exists(os.path.join(root, self.name)): os.makedirs(root, exist_ok=True) # download files for url, md5 in self.resources: filename = url.rpartition("/")[2] download_and_extract_archive(url, download_root=root, filename=filename, md5=md5) rootpath = os.path.join(root, self.name, "train" if train else "val") super().__init__(rootpath=rootpath, **kwargs)
def download(self) -> None: if self._check_integrity(): print("Files already downloaded and verified") return download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.tgz_md5)
def __init__( self, root: str, train: bool = True, target_type: str = "bicubic_X4", patch_size: Tuple[int, int] = (96, 96), transform: Optional[Callable[[Dict], Dict]] = None, low_resolution_image_key: str = "lr_image", high_resolution_image_key: str = "hr_image", download: bool = False, ) -> None: mode = "train" if train else "valid" filename_hr = f"DIV2K_{mode}_HR.zip" filename_lr = f"DIV2K_{mode}_LR_{target_type}.zip" if download: # download HR (target) images download_and_extract_archive( f"{self.url}{filename_hr}", download_root=root, filename=filename_hr, md5=self.resources[filename_hr], ) # download lr (input) images download_and_extract_archive( f"{self.url}{filename_lr}", download_root=root, filename=filename_lr, md5=self.resources[filename_lr], ) self.train = train self.lr_key = low_resolution_image_key self.hr_key = high_resolution_image_key # 'index' files lr_images = self._images_in_dir(Path(root) / Path(filename_lr).stem) hr_images = self._images_in_dir(Path(root) / Path(filename_hr).stem) assert len(lr_images) == len(hr_images) self.data = [{ "lr_image": lr_image, "hr_image": hr_image } for lr_image, hr_image in zip(lr_images, hr_images)] self.open_fn = data.ReaderCompose([ data.ImageReader(input_key="lr_image", output_key=self.lr_key), data.ImageReader(input_key="hr_image", output_key=self.hr_key), ]) self.scale = int(target_type[-1]) if target_type[-1].isdigit() else 4 height, width = patch_size self.target_patch_size = patch_size self.input_patch_size = (height // self.scale, width // self.scale) self.transform = transform if transform is not None else lambda x: x
def _download(self): """Download and extract files/""" if self._check_exists(): return os.makedirs(self.raw_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True) url = self.resources[0] md5 = self.resources[1] download_and_extract_archive( url=url, download_root=self.raw_folder, filename=self.filename, md5=md5, remove_finished=True, )
def download(self): """Download the MNIST data if it doesn't exist in processed_folder.""" if self._check_exists(): return os.makedirs(self.raw_folder, exist_ok=True) os.makedirs(self.processed_folder, exist_ok=True) # download files for url, md5 in self.resources: filename = url.rpartition("/")[2] download_and_extract_archive(url, download_root=self.raw_folder, filename=filename, md5=md5) # process and save as torch files print("Processing...") training_set = ( _read_image_file( os.path.join(self.raw_folder, "train-images-idx3-ubyte")), _read_label_file( os.path.join(self.raw_folder, "train-labels-idx1-ubyte")), ) test_set = ( _read_image_file( os.path.join(self.raw_folder, "t10k-images-idx3-ubyte")), _read_label_file( os.path.join(self.raw_folder, "t10k-labels-idx1-ubyte")), ) with open(os.path.join(self.processed_folder, self.training_file), "wb") as f: torch.save(training_set, f) with open(os.path.join(self.processed_folder, self.test_file), "wb") as f: torch.save(test_set, f) print("Done!")