コード例 #1
0
ファイル: fastai.py プロジェクト: tobycollins/catalyst
    def __init__(self,
                 root: str,
                 train: bool = True,
                 download: bool = False,
                 **kwargs):
        """Constructor method for the ``ImageClassificationDataset`` class.

        Args:
            root: root directory of dataset
            train: if ``True``, creates dataset from ``train/``
                subfolder, otherwise from ``val/``
            download: if ``True``, downloads the dataset from
                the internet and puts it in root directory. If dataset
                is already downloaded, it is not downloaded again
            **kwargs:
        """
        # downlad dataset if needed
        if download and not os.path.exists(os.path.join(root, self.name)):
            os.makedirs(root, exist_ok=True)

            # download files
            for url, md5 in self.resources:
                filename = url.rpartition("/")[2]
                download_and_extract_archive(url,
                                             download_root=root,
                                             filename=filename,
                                             md5=md5)

        rootpath = os.path.join(root, self.name, "train" if train else "val")
        super().__init__(rootpath=rootpath, **kwargs)
コード例 #2
0
 def download(self) -> None:
     if self._check_integrity():
         print("Files already downloaded and verified")
         return
     download_and_extract_archive(self.url,
                                  self.root,
                                  filename=self.filename,
                                  md5=self.tgz_md5)
コード例 #3
0
    def __init__(
        self,
        root: str,
        train: bool = True,
        target_type: str = "bicubic_X4",
        patch_size: Tuple[int, int] = (96, 96),
        transform: Optional[Callable[[Dict], Dict]] = None,
        low_resolution_image_key: str = "lr_image",
        high_resolution_image_key: str = "hr_image",
        download: bool = False,
    ) -> None:
        mode = "train" if train else "valid"
        filename_hr = f"DIV2K_{mode}_HR.zip"
        filename_lr = f"DIV2K_{mode}_LR_{target_type}.zip"
        if download:
            # download HR (target) images
            download_and_extract_archive(
                f"{self.url}{filename_hr}",
                download_root=root,
                filename=filename_hr,
                md5=self.resources[filename_hr],
            )

            # download lr (input) images
            download_and_extract_archive(
                f"{self.url}{filename_lr}",
                download_root=root,
                filename=filename_lr,
                md5=self.resources[filename_lr],
            )

        self.train = train

        self.lr_key = low_resolution_image_key
        self.hr_key = high_resolution_image_key

        # 'index' files
        lr_images = self._images_in_dir(Path(root) / Path(filename_lr).stem)
        hr_images = self._images_in_dir(Path(root) / Path(filename_hr).stem)
        assert len(lr_images) == len(hr_images)

        self.data = [{
            "lr_image": lr_image,
            "hr_image": hr_image
        } for lr_image, hr_image in zip(lr_images, hr_images)]

        self.open_fn = data.ReaderCompose([
            data.ImageReader(input_key="lr_image", output_key=self.lr_key),
            data.ImageReader(input_key="hr_image", output_key=self.hr_key),
        ])

        self.scale = int(target_type[-1]) if target_type[-1].isdigit() else 4
        height, width = patch_size
        self.target_patch_size = patch_size
        self.input_patch_size = (height // self.scale, width // self.scale)

        self.transform = transform if transform is not None else lambda x: x
コード例 #4
0
    def _download(self):
        """Download and extract files/"""
        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)
        url = self.resources[0]
        md5 = self.resources[1]

        download_and_extract_archive(
            url=url,
            download_root=self.raw_folder,
            filename=self.filename,
            md5=md5,
            remove_finished=True,
        )
コード例 #5
0
ファイル: mnist.py プロジェクト: Podidiving/catalyst
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder."""
        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        # download files
        for url, md5 in self.resources:
            filename = url.rpartition("/")[2]
            download_and_extract_archive(url,
                                         download_root=self.raw_folder,
                                         filename=filename,
                                         md5=md5)

        # process and save as torch files
        print("Processing...")

        training_set = (
            _read_image_file(
                os.path.join(self.raw_folder, "train-images-idx3-ubyte")),
            _read_label_file(
                os.path.join(self.raw_folder, "train-labels-idx1-ubyte")),
        )
        test_set = (
            _read_image_file(
                os.path.join(self.raw_folder, "t10k-images-idx3-ubyte")),
            _read_label_file(
                os.path.join(self.raw_folder, "t10k-labels-idx1-ubyte")),
        )
        with open(os.path.join(self.processed_folder, self.training_file),
                  "wb") as f:
            torch.save(training_set, f)
        with open(os.path.join(self.processed_folder, self.test_file),
                  "wb") as f:
            torch.save(test_set, f)

        print("Done!")