Example #1
0
    def download(self) -> None:
        """Download the MNIST data if it doesn't exist in processed_folder already."""

        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        # download files
        for url, md5 in self.resources:
            filename = url.rpartition('/')[2]
            download_and_extract_archive(url,
                                         download_root=self.raw_folder,
                                         filename=filename,
                                         md5=md5)

        # process and save as torch files
        print('Processing...')

        training_set = (torch.load('/home/saeid/data/X_train.pt'),
                        torch.load('/home/saeid/data/y_train.pt'))
        test_set = (torch.load('/home/saeid/data/X_test.pt'),
                    torch.load('/home/saeid/data/y_test.pt'))
        with open(os.path.join(self.processed_folder, self.training_file),
                  'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.processed_folder, self.test_file),
                  'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Example #2
0
 def __init__(self, years, symmetric=False):
     self.url_dir = 'http://www.bic.mni.mcgill.ca/~vfonov/nihpd/obj1/'
     sym_string = 'sym' if symmetric else 'asym'
     if not isinstance(years, tuple) or years not in SUPPORTED_YEARS:
         message = f'Years must be a tuple in {SUPPORTED_YEARS}'
         raise ValueError(message)
     a, b = years
     file_id = f'{sym_string}_{format_age(a)}-{format_age(b)}'
     self.name = f'nihpd_{file_id}_nifti'
     self.filename = f'{self.name}.zip'
     self.url = urllib.parse.urljoin(self.url_dir, self.filename)
     download_root = get_torchio_cache_dir() / self.name
     if download_root.is_dir():
         print(f'Using cache found in {download_root}')
     else:
         download_and_extract_archive(
             self.url,
             download_root=download_root,
             filename=self.filename,
         )
     super().__init__(
         t1=Image(download_root / f'nihpd_{file_id}_t1w.nii'),
         t2=Image(download_root / f'nihpd_{file_id}_t2w.nii'),
         pd=Image(download_root / f'nihpd_{file_id}_pdw.nii'),
         mask=Image(download_root / f'nihpd_{file_id}_pdw.nii', type=LABEL),
     )
Example #3
0
 def download(self):
     if self._check_integrity():
         print("Files already downloaded and verified")
         return
     download_and_extract_archive(self.url, self.root)
     os.rename(os.path.join(self.root, self.basedir),
               os.path.join(self.root, self.name))
Example #4
0
    def download(self, cleanup):
        if self._check_integrity():
            print('Files already downloaded and verified')
            return

        download_and_extract_archive(self.images_url, os.path.join(self.root, "raw_data"), filename=self.raw_ims_file)
        download_url(self.labels_url, os.path.join(self.root, "raw_data"), self.label_file)
        download_url(self.splits_url, os.path.join(self.root, "raw_data"), self.splits_file)

        mat = scipy.io.loadmat(os.path.join(self.root, 'raw_data', self.label_file))
        labels = mat['labels'][0]
        classes = [str(x) for x in set(list(labels))]

        mat = scipy.io.loadmat(os.path.join(self.root, 'raw_data', self.splits_file))
        train = mat['trnid'][0]
        val = mat['valid'][0]
        test = mat['tstid'][0]

        def create_split(split_name, split_indices):
            os.mkdir(os.path.join(self.root, split_name))
            for label_name in classes:
                os.mkdir(os.path.join(self.root, split_name, label_name))
            for sample_idx in split_indices:
                file_name = f'image_{sample_idx:05d}.jpg'
                src = os.path.join(self.root, 'raw_data', 'jpg', file_name)
                dest = os.path.join(self.root, split_name, str(labels[sample_idx - 1]), file_name)
                shutil.copy(src, dest)

        create_split('train', train)
        create_split('val', val)
        create_split('test', test)

        if cleanup:
            shutil.rmtree(os.path.join(self.root, "raw_data"))
Example #5
0
def download(dataroot: str):
    DOWNLOAD_URL = 'http://groups.csail.mit.edu/vision/LabelMe/NewImages/indoorCVPR_09.tar'
    with tempfile.TemporaryDirectory() as tempdir:
        download_and_extract_archive(DOWNLOAD_URL,
                                     tempdir,
                                     extract_root=dataroot,
                                     remove_finished=True)
Example #6
0
 def download(self):
     if not self._check_integrity():
         download_and_extract_archive(self.url, self.root)
         os.rename(
             os.path.join(self.root, self.basedir),
             os.path.join(self.root, self.name),
         )
Example #7
0
def download(root: str, file_name: str, archive_name: str, url_link: str):
    """ Download file from internet url link.
  :param root: (string) The directory to put downloaded files.
  :param file_name: (string) The name of the unzipped file.
  :param archive_name: (string) The name of archive(zipped file) downloaded.
  :param url_link: (string) The url link to download data.
  :return: None
  .. note::
  If `file_name` already exists under path `root`, then it is not downloaded again.
  Else `archive_name` will be downloaded from `url_link` and extracted to `file_name`.
  """
    if not os.path.exists(os.path.join(root, file_name)):
        print("Downloading {}".format(file_name))
        if os.path.exists(os.path.join(root, archive_name)):
            os.remove(os.path.join(root, archive_name))
        try:
            download_and_extract_archive(url_link,
                                         download_root=root,
                                         filename=archive_name,
                                         remove_finished=True)
        except Exception:
            print("Fail to download {} from url link {}".format(
                archive_name, url_link))
            print('Please check you internet connection or '
                  "reinstall DALIB by 'pip install --upgrade dalib'")
            exit(0)
Example #8
0
    def download(self: 'HPatchesSequencesStereoPairs') -> None:

        if not self._check_raw_exists():
            os.makedirs(self.raw_folder, exist_ok=True)

            tv_data.download_and_extract_archive(
                HPatchesSequencesStereoPairs.url,
                download_root=self.raw_folder,
                remove_finished=True)

        if not self._check_processed_exists():
            hpatches_folders = [
                os.path.join(self.raw_extracted_folder, seq_folder)
                for seq_folder in self._split_sequences
            ]

            sequences = [
                HPatchesPairGenerator.read_raw_folder(
                    seq_folder, self.convert_to_grayscale)
                for seq_folder in hpatches_folders
            ]

            # IMIPS downscales larges images
            if self.downsample_large_images:
                for seq in sequences:
                    while np.any(np.array(seq.images[0].shape) > 1000):
                        seq.downsample_in_place()

            os.makedirs(self.processed_folder, exist_ok=True)
            with open(self.processed_file, 'wb') as pickle_file:
                pickle.dump(sequences, pickle_file)
Example #9
0
    def _download(self):
        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)
        os.makedirs(self.processed_folder, exist_ok=True)

        # download files
        for url, md5 in self.resources:
            filename = url.rpartition('/')[2]
            download_and_extract_archive(url, download_root=self.raw_folder, filename=filename,
                                         md5=md5)

        # process and save as torch files
        logging.info('Processing...')
        training_set = (read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
                        read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte')))
        test_set = (read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
                    read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte')))
        with open(os.path.join(self.processed_folder, 'train.pt'), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.processed_folder, 'test.pt'), 'wb') as f:
            torch.save(test_set, f)
        logging.info('Done downloading!')

        self._create_val_split()
Example #10
0
    def __init__(self, root, train, transform, download=True):

        self.url = "http://cs231n.stanford.edu/tiny-imagenet-200"
        self.root = root
        if download:
            if os.path.exists(f'{self.root}/tiny-imagenet-200/'):
                print('File already downloaded')
            else:
                download_and_extract_archive(self.url,
                                             root,
                                             filename="tiny-imagenet-200.zip")

        self.root = os.path.join(self.root, "tiny-imagenet-200")
        self.train = train
        self.transform = transform
        self.ids_string = np.sort(np.loadtxt(f"{self.root}/wnids.txt", "str"))
        self.ids = {
            class_string: i
            for i, class_string in enumerate(self.ids_string)
        }
        if train:
            self.paths = glob.glob(f"{self.root}/train/*/images/*")
            self.label = [self.ids[path.split("/")[-3]] for path in self.paths]
        else:
            self.val_annotations = np.loadtxt(
                f"{self.root}/val/val_annotations.txt", "str")
            self.paths = [
                f"{self.root}/val/images/{sample[0]}"
                for sample in self.val_annotations
            ]
            self.label = [
                self.ids[sample[1]] for sample in self.val_annotations
            ]
Example #11
0
    def __init__(self, root, split='train', task='all', download=True, **kwargs):
        if download:
            if not osp.exists(osp.join(root, "training")) or not osp.exists(osp.join(root, "evaluation")):
                download_and_extract_archive("https://lmb.informatik.uni-freiburg.de/data/freihand/FreiHAND_pub_v2.zip",
                                             download_root=root, filename="FreiHAND_pub_v2.zip", remove_finished=False,
                                             extract_root=root)

        assert split in ['train', 'test', 'all']
        self.split = split

        assert task in ['all', 'gs', 'auto', 'sample', 'hom']
        self.task = task
        if task == 'all':
            samples = self.get_samples(root, 'gs') + self.get_samples(root, 'auto') + self.get_samples(root, 'sample') + self.get_samples(root, 'hom')
        else:
            samples = self.get_samples(root, task)
        random.seed(42)
        random.shuffle(samples)
        samples_len = len(samples)
        samples_split = min(int(samples_len * 0.2), 3200)
        if self.split == 'train':
            samples = samples[samples_split:]
        elif self.split == 'test':
            samples = samples[:samples_split]

        super(FreiHand, self).__init__(root, samples, **kwargs)
Example #12
0
    def download(self):
        if self._check_integrity():
            print("Files already downloaded and verified")
            return
        root = self.root
        download_and_extract_archive(self.url,
                                     root,
                                     root,
                                     self.filename,
                                     self.zip_md5,
                                     remove_finished=True)
        os.rename(join(root, "Graph Wavelets Demo"),
                  join(root, "minnesota-usc"))

        files2keep = self.files2keep
        files2keep.append("Datasets")
        dirs = os.listdir(join(root, self.top_dir))
        for file_or_dir in dirs:
            if file_or_dir not in files2keep:
                remove_file_or_dir(join(root, self.top_dir, file_or_dir))

        for mat_file, md5sub in self.mat_list:
            abs_path = join(root, self.top_dir, "Datasets", mat_file)
            check_integrity(abs_path, md5sub)
            move(abs_path, join(root, self.top_dir, mat_file))

        remove_file_or_dir(join(root, self.top_dir, "Datasets"))
Example #13
0
    def download(self, data_dir: str) -> None:
        """Download dataset

        Parameters
        ----------
        data_dir : str
            Path to base dataset directory

        Returns
        -------
        None

        """
        if not os.path.exists(data_dir):
            os.mkdir(data_dir)

        if not os.path.exists(self.root_dir):
            os.makedirs(self.root_dir)

            download_and_extract_archive(self.url,
                                         data_dir,
                                         remove_finished=True)

            # Tidy up
            for d in ['train', 'val', 'test']:
                shutil.move(src=os.path.join(data_dir,
                                             'BSR/BSDS500/data/images', d),
                            dst=self.root_dir)
                os.remove(os.path.join(self.root_dir, d, 'Thumbs.db'))

            shutil.rmtree(os.path.join(data_dir, 'BSR'))
def download_dtd_dataset(root: str):
    """
    Download the Oxford Pets dataset archives and expand them
    in the folder provided as parameter
    """
    url = "https://www.robots.ox.ac.uk/~vgg/data/dtd/download/dtd-r1.0.1.tar.gz"
    download_and_extract_archive(url, root)
Example #15
0
    def prepare_data(self):
        """Download images and prepare images datasets."""

        if not self.data_path.is_dir():
            download_and_extract_archive(url=self.data_url,
                                         download_root=self.dl_path,
                                         remove_finished=True)
Example #16
0
    def prepare_data(self):
        """Download images and prepare images datasets."""

        # 1. Download the images
        download_and_extract_archive(url=DATA_URL,
                                     download_root=self.dl_path,
                                     remove_finished=True)

        data_path = Path(self.dl_path).joinpath('cats_and_dogs_filtered')

        # 2. Load the data + preprocessing & data augmentation
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = ImageFolder(root=data_path.joinpath('train'),
                                    transform=transforms.Compose([
                                        transforms.Resize((224, 224)),
                                        transforms.RandomHorizontalFlip(),
                                        transforms.ToTensor(),
                                        normalize,
                                    ]))

        valid_dataset = ImageFolder(root=data_path.joinpath('validation'),
                                    transform=transforms.Compose([
                                        transforms.Resize((224, 224)),
                                        transforms.ToTensor(),
                                        normalize,
                                    ]))

        self.train_dataset = train_dataset
        self.valid_dataset = valid_dataset
Example #17
0
    def load_word_embedding(embedding_name, embedding_dim: int=300) -> WordEmbedding:
        assert Sent140.WORD_EMBEDDING is None, '只能加载一次'
        extract_root = os.path.join(EMBEEDING_PREFIX, embedding_name)
        if embedding_name.startswith('glove'):
            target_filename = os.path.join(extract_root, embedding_name + '.{}d.txt'.format(embedding_dim))
        else:
            raise ValueError('不能加载 {} 对应的信息'.format(embedding_name))

        if not os.path.exists(target_filename):
            tmp_root = os.path.join(EMBEEDING_PREFIX, 'tmp')
            download_and_extract_archive(url=Sent140.WORD_EMBEDDING_URLS[embedding_name], download_root=tmp_root,
                                         extract_root=extract_root, remove_finished=True)

        # 加载词向量, 这里默认用 GLOVE
        print('使用词向量文件: {}'.format(target_filename))
        with open(target_filename, 'r') as inf:
            lines = inf.readlines()
        # lines[0] 词, 后面是向量的每个维度
        lines = [l.split() for l in lines]
        # 所有的词
        index2word = [l[0] for l in lines]
        # 词对应的向量列表
        emb_floats = [np.asarray([float(n) for n in l[1:]], dtype=np.float32) for l in lines]
        # 默认最后一个维度作为 UNK, 这里的词嵌入增加了最后一维度, 作为 UNK, 但是词汇表数量是少一个的
        emb_floats.append(np.zeros([embedding_dim], dtype=np.float32))  # for unknown word
        #
        embedding = np.stack(emb_floats, axis=0)
        #
        word2index = {v: k for k, v in enumerate(index2word)}
        return WordEmbedding(word2index=word2index,
                             index2word=index2word,
                             embedding=embedding,
                             unk_index=len(index2word),
                             num_vocabulary=len(index2word))
Example #18
0
def download(dataroot: str):
    DOWNLOAD_URL = 'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz'
    with tempfile.TemporaryDirectory() as tempdir:
        download_and_extract_archive(DOWNLOAD_URL,
                                     tempdir,
                                     extract_root=dataroot,
                                     remove_finished=True)
Example #19
0
    def download(self):
        """Download the MNIST data if it doesn't exist in processed_folder already."""

        if self._check_exists():
            return

        makedir_exist_ok(self.raw_folder)
        makedir_exist_ok(self.processed_folder)

        # download files
        for url in self.urls:
            filename = url.rpartition('/')[2]
            download_and_extract_archive(url, download_root=self.raw_folder, filename=filename)

        # process and save as torch files
        print('Processing...')

        training_set = (
            read_image_file(os.path.join(self.raw_folder, 'train-images-idx3-ubyte')),
            read_label_file(os.path.join(self.raw_folder, 'train-labels-idx1-ubyte'))
        )
        test_set = (
            read_image_file(os.path.join(self.raw_folder, 't10k-images-idx3-ubyte')),
            read_label_file(os.path.join(self.raw_folder, 't10k-labels-idx1-ubyte'))
        )
        with open(os.path.join(self.processed_folder, self.training_file), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(self.processed_folder, self.test_file), 'wb') as f:
            torch.save(test_set, f)

        print('Done!')
Example #20
0
    def download(self):
        if not check_integrity(self.meta_file):
            tmp_dir = tempfile.mkdtemp()

            archive_dict = self.archive_dict['devkit']
            download_and_extract_archive(archive_dict['url'],
                                         self.root,
                                         extract_root=tmp_dir,
                                         md5=archive_dict['md5'])
            devkit_folder = _splitexts(os.path.basename(
                archive_dict['url']))[0]
            meta = parse_devkit(os.path.join(tmp_dir, devkit_folder))
            self._save_meta_file(*meta)

            shutil.rmtree(tmp_dir)

        if not os.path.isdir(self.split_folder):
            archive_dict = self.archive_dict[self.split]
            download_and_extract_archive(archive_dict['url'],
                                         self.root,
                                         extract_root=self.split_folder,
                                         md5=archive_dict['md5'])

            if self.split == 'train':
                prepare_train_folder(self.split_folder)
            elif self.split == 'val':
                val_wnids = self._load_meta_file()[1]
                prepare_val_folder(self.split_folder, val_wnids)
        else:
            msg = (
                "You set download=True, but a folder '{}' already exist in "
                "the root directory. If you want to re-download or re-extract the "
                "archive, delete the folder.")
            print(msg.format(self.split))
Example #21
0
 def download(self):
     if self._check_integrity():
         print('Files already downloaded and verified')
         return
     utils.download_and_extract_archive(self.url, self.root,
                                        filename=self.filename,
                                        md5=self.tgz_md5)
    def __init__(self,
                 root: str,
                 task: str,
                 download: Optional[bool] = False,
                 **kwargs):
        if download:
            for dir in self.directories.values():
                if not os.path.exists(os.path.join(root, dir)):
                    download_and_extract_archive(
                        url=
                        "https://cloud.tsinghua.edu.cn/f/e93f2e07d93243d6b57e/?dl=1",
                        download_root=os.path.join(root, 'download'),
                        filename="officecaltech.tgz",
                        remove_finished=False,
                        extract_root=root)
                    break
        else:
            list(
                map(lambda dir, _: check_exits(root, dir),
                    self.directories.values()))

        super(OfficeCaltech,
              self).__init__(os.path.join(root, self.directories[task]),
                             default_loader,
                             extensions=IMG_EXTENSIONS,
                             **kwargs)
        self.classes = OfficeCaltech.CLASSES
        self.class_to_idx = {
            cls: idx
            for idx, clss in enumerate(self.classes) for cls in clss
        }
Example #23
0
 def download(self):
     if self._check_integrity():
         print("Files already downloaded and verified")
     download_and_extract_archive(self.url,
                                  self.root,
                                  filename=self.filename,
                                  md5=None)
Example #24
0
 def download(self):
     url = "https://github.com/chiayewken/sutd-materials/releases/download/v0.1.0/numberbatch-en-19.08.txt.gz"
     path = self.cache_dir / Path(url).stem
     if not path.exists():
         download_and_extract_archive(url, str(self.cache_dir))
     assert path.exists()
     return path
Example #25
0
    def __init__(self, train=True, rootDir=None, transform=None, test=False):
        super(TinyImageNet, self).__init__()
        datasetURL = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
        if rootDir is None:
            rootDir = os.path.abspath(os.path.join(os.getcwd(), "../data"))
        if not os.path.exists(
                os.path.join(rootDir, "../data/tiny-imagenet-200")):
            print(f"Downloading TinyImageNet data to {rootDir}")
            download_and_extract_archive(datasetURL, rootDir)
            print("...done")
        self.rootDir = os.path.abspath(
            os.path.join(rootDir, "tiny-imagenet-200"))
        self.train = train
        self.test = test
        self.transforms = transforms
        trainDataset = ImageFolder(os.path.join(self.rootDir, "train"),
                                   transform)
        testDataset = ImageFolder(os.path.join(self.rootDir, "test"),
                                  transform)
        validDataset = TinyImagenetVal(self.rootDir, transform)

        if not self.test:
            if self.train:
                self._dataset = trainDataset
            else:
                self._dataset = validDataset
            self.targets = self._dataset.targets
        else:
            self._dataset = testDataset
            self.targets = None
Example #26
0
    def download(self) -> None:
        """Download data if it doesn't exist already."""

        if self._check_exists():
            return

        os.makedirs(self.raw_folder, exist_ok=True)

        # download files
        for filename, md5 in self.resources:
            for mirror in self.mirrors:
                url = "{}{}".format(mirror, filename)
                try:
                    print("Downloading {}".format(url))
                    download_and_extract_archive(
                        url, download_root=self.raw_folder,
                        filename=filename,
                        md5=md5
                    )
                except URLError as error:
                    print(
                        "Failed to download (trying next):\n{}".format(error)
                    )
                    continue
                finally:
                    print()
                break
            else:
                raise RuntimeError("Error downloading {}".format(filename))
Example #27
0
def download_mnist(data_dir):
    """
    Taken from torchvision.datasets.mnist
    Dwonloads Mnist  from the official site
    reshapes themas images, normalizes them and saves them as a tensor
    """
    raw_folder = os.path.join(data_dir, 'raw')
    if not os.path.exists(raw_folder):
        os.makedirs(raw_folder, exist_ok=True)

        # download files
        train_imgs_url = "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", "f68b3c2dcbeaaa9fbdd348bbdeb94873"
        test_imgs_url = "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", "9fb629c4189551a2d022fa330f9573f3"
        for url, md5 in [train_imgs_url, test_imgs_url]:
            filename = url.rpartition('/')[2]
            download_and_extract_archive(url, download_root=raw_folder, filename=filename, md5=md5)

    if not os.path.exists(os.path.join(data_dir, 'train_data.pt')):

        # process and save as torch files
        print('Processing...')

        training_set = read_image_file(os.path.join(raw_folder, 'train-images-idx3-ubyte'))
        test_set = read_image_file(os.path.join(raw_folder, 't10k-images-idx3-ubyte'))

        # preprocess: reshape and normalize from [0,255] to [-1,1]
        training_set = training_set.reshape(-1, 1, MNIST_WORKING_DIM, MNIST_WORKING_DIM) / 127.5 - 1
        test_set = test_set.reshape(-1, 1, MNIST_WORKING_DIM, MNIST_WORKING_DIM) / 127.5 - 1

        with open(os.path.join(data_dir, 'train_data.pt'), 'wb') as f:
            torch.save(training_set, f)
        with open(os.path.join(data_dir, 'test_data.pt'), 'wb') as f:
            torch.save(test_set, f)

    print('Done!')
Example #28
0
    def download(self):

        if os.path.exists(self.sonycust_folder):
            return

        os.makedirs(self.sonycust_folder, exist_ok=True)

        # Download files
        print("Downloading files")
        for url, filename, md5 in self.resources[4:]:
            filename = url.rpartition('/')[2]
            download_url(url,
                         root=self.sonycust_folder,
                         filename=filename,
                         md5=md5)

        for url, filename, md5 in self.resources[0:4]:
            download_and_extract_archive(url,
                                         download_root=self.sonycust_folder,
                                         filename=filename + ".tar.gz",
                                         md5=md5,
                                         remove_finished=True)

        # Moving evaluation files to audio directory
        print("Moving files from eval to audio")
        for eval_num in range(3):
            for f in os.listdir(self.file_path_dict['audio-eval-' +
                                                    str(eval_num)]):
                if f.endswith(".wav"):
                    shutil.move(
                        os.path.join(
                            self.file_path_dict['audio-eval-' + str(eval_num)],
                            f), self.file_path_dict['audio'])
Example #29
0
 def download(self):
     if self._check_integrity():
         return
     download_and_extract_archive(self.url,
                                  self.root,
                                  filename=self.filename,
                                  md5=self.tgz_md5)
Example #30
0
    def __init__(self,
                 root,
                 split,
                 bands=None,
                 transform=None,
                 target_transform=None,
                 download=False,
                 use_new_labels=True):
        self.root = Path(root)
        self.split = split
        self.bands = bands if bands is not None else RGB_BANDS
        self.transform = transform
        self.target_transform = target_transform
        self.use_new_labels = use_new_labels

        if download:
            download_and_extract_archive(self.url, self.root)
            download_url(self.list_file[self.split], self.root,
                         f'{self.split}.txt')
            for url in self.bad_patches:
                download_url(url, self.root)

        bad_patches = set()
        for url in self.bad_patches:
            filename = Path(url).name
            with open(self.root / filename) as f:
                bad_patches.update(f.read().splitlines())

        self.samples = []
        with open(self.root / f'{self.split}.txt') as f:
            for patch_id in f.read().splitlines():
                if patch_id not in bad_patches:
                    self.samples.append(self.root / self.subdir / patch_id)