def _check_integrity(self) -> bool: return all( check_integrity(self._get_path(filename), md5) for filename, md5 in self.checksum.items())
def __init__(self, data_root, split_mode, download_code=None, download=False, extract=False): self.data_root = path.expanduser(data_root) self.split_mode = split_mode self._zip_path = path.join(self.data_root, 'DRIVE', 'DRIVE.zip') self._zip_md5 = 'a91f25272507b1f53132d03a69030de8' if self.split_mode == 'train': self._image_dir = path.join(self.data_root, 'DRIVE', 'training', 'images') self._target_dir = path.join(self.data_root, 'DRIVE', 'training', '1st_manual') self._mask_dir = path.join(self.data_root, 'DRIVE', 'training', 'mask') self.sample_keys = ['image', 'target', 'mask'] elif self.split_mode in ('valid', 'test'): self._image_dir = path.join(self.data_root, 'DRIVE', 'test', 'images') self._target_dir = path.join(self.data_root, 'DRIVE', 'test', '1st_manual') self._mask_dir = path.join(self.data_root, 'DRIVE', 'test', 'mask') self._target_aux_dir = path.join(self.data_root, 'DRIVE', 'test', '2nd_manual') self.sample_keys = ['image', 'target', 'mask', 'target_aux'] else: LOGGER.error('Invalid split mode: %s', self.split_mode) raise NotImplementedError('Invalid split mode: {}'.format( self.split_mode)) if download: self._download(download_code) elif extract: self._extract() if not check_integrity(self._zip_path, self._zip_md5): LOGGER.error('DRIVE dataset not found or corrupted') raise RuntimeError('DRIVE dataset not found or corrupted') self._image_paths = sorted( list_files(root=self._image_dir, suffix=('.tif', '.TIF'), prefix=True)) self._target_paths = sorted( list_files(root=self._target_dir, suffix=('.gif', '.GIF'), prefix=True)) self._mask_paths = sorted( list_files(root=self._mask_dir, suffix=('.gif', '.GIF'), prefix=True)) if self.split_mode in ('valid', 'test'): self._target_aux_paths = sorted( list_files(root=self._target_aux_dir, suffix=('.gif', '.GIF'), prefix=True)) assert len(self._image_paths) == len( self._target_aux_paths), 'DRIVE dataset corrupted' assert len(self._image_paths) == len( self._target_paths), 'DRIVE dataset corrupted' assert len(self._image_paths) == len( self._mask_paths), 'DRIVE dataset corrupted' LOGGER.debug('Retrieved all %d samples for DRIVE dataset', len(self._image_paths)) # for compatibility of other datasets self.indices = list(range(len(self._image_paths)))
def _check_integrity(self): root = self.root md5 = self.split_list[self.split][2] fpath = os.path.join(root, self.filename) return check_integrity(fpath, md5)
def _check_exists(self) -> bool: return all( check_integrity(os.path.join( self.raw_folder, os.path.splitext(os.path.basename(url))[0]) ) for url, _ in self.resources )
def __init__( self, root: str, train: bool = None, data_type: str = 'event', frames_number: int = None, split_by: str = None, duration: int = None, padding_frame: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, ) -> None: ''' :param root: root path of the dataset :type root: str :param train: whether use the train set. Set ``True`` or ``False`` for those datasets provide train/test division, e.g., DVS128 Gesture dataset. If the dataset does not provide train/test division, e.g., CIFAR10-DVS, please set ``None`` and use :class:`~split_to_train_test_set` function to get train/test set :type train: bool :param data_type: `event` or `frame` :type data_type: str :param frames_number: the integrated frame number :type frames_number: int :param split_by: `time` or `number` :type split_by: str :param duration: the time duration of each frame :type duration: int :param padding_frame: whether padding the frames number to the maximum number of frames :type padding_frame: bool :param transform: a function/transform that takes in a sample and returns a transformed version. E.g, ``transforms.RandomCrop`` for images. :type transform: callable :param target_transform: a function/transform that takes in the target and transforms it. :type target_transform: callable The base class for neuromorphic dataset. Users can define a new dataset by inheriting this class and implementing all abstract methods. Users can refer to :class:`spikingjelly.datasets.dvs128_gesture.DVS128Gesture`. If ``data_type == 'event'`` the sample in this dataset is a dict whose keys are ['t', 'x', 'y', 'p'] and values are ``numpy.ndarray``. If ``data_type == 'frame'`` and ``frames_number`` is not ``None`` events will be integrated to frames with fixed frames number. ``split_by`` will define how to split events. See :class:`cal_fixed_frames_number_segment_index` for more details. If ``data_type == 'frame'``, ``frames_number`` is ``None``, and ``duration`` is not ``None`` events will be integrated to frames with fixed time duration. If ``padding_frame`` is ``True``, each sample will be padded to the same frames number (length), which is the maximum frames number of all frames. ''' events_np_root = os.path.join(root, 'events_np') if not os.path.exists(events_np_root): download_root = os.path.join(root, 'download') if os.path.exists(download_root): print(f'The [{download_root}] directory for saving downloaed files already exists, check files...') # check files resource_list = self.resource_url_md5() for i in range(resource_list.__len__()): file_name, url, md5 = resource_list[i] fpath = os.path.join(download_root, file_name) if not utils.check_integrity(fpath=fpath, md5=md5): print(f'The file [{fpath}] does not exist or is corrupted.') if os.path.exists(fpath): # If file is corrupted, we will remove it. os.remove(fpath) print(f'Remove [{fpath}]') if self.downloadable(): # If file does not exist, we will download it. print(f'Download [{file_name}] from [{url}] to [{download_root}]') utils.download_url(url=url, root=download_root, filename=file_name, md5=md5) else: raise NotImplementedError( f'This dataset can not be downloaded by SpikingJelly, please download [{file_name}] from [{url}] manually and put files at {download_root}.') else: os.mkdir(download_root) print(f'Mkdir [{download_root}] to save downloaded files.') resource_list = self.resource_url_md5() if self.downloadable(): # download and extract file for i in range(resource_list.__len__()): file_name, url, md5 = resource_list[i] print(f'Download [{file_name}] from [{url}] to [{download_root}]') utils.download_url(url=url, root=download_root, filename=file_name, md5=md5) else: raise NotImplementedError(f'This dataset can not be downloaded by SpikingJelly, ' f'please download files manually and put files at [{download_root}]. ' f'The resources file_name, url, and md5 are: \n{resource_list}') # We have downloaded files and checked files. Now, let us extract the files extract_root = os.path.join(root, 'extract') if os.path.exists(extract_root): print(f'The directory [{extract_root}] for saving extracted files already exists.\n' f'SpikingJelly will not check the data integrity of extracted files.\n' f'If extracted files are not integrated, please delete [{extract_root}] manually, ' f'then SpikingJelly will re-extract files from [{download_root}].') # shutil.rmtree(extract_root) # print(f'Delete [{extract_root}].') else: os.mkdir(extract_root) print(f'Mkdir [{extract_root}].') self.extract_downloaded_files(download_root, extract_root) # Now let us convert the origin binary files to npz files os.mkdir(events_np_root) print(f'Mkdir [{events_np_root}].') print(f'Start to convert the origin data from [{extract_root}] to [{events_np_root}] in np.ndarray format.') self.create_events_np_files(extract_root, events_np_root) H, W = self.get_H_W() if data_type == 'event': _root = events_np_root _loader = np.load _transform = transform _target_transform = target_transform elif data_type == 'frame': if frames_number is not None: assert frames_number > 0 and isinstance(frames_number, int) assert split_by == 'time' or split_by == 'number' frames_np_root = os.path.join(root, f'frames_number_{frames_number}_split_by_{split_by}') if os.path.exists(frames_np_root): print(f'The directory [{frames_np_root}] already exists.') else: os.mkdir(frames_np_root) print(f'Mkdir [{frames_np_root}].') # create the same directory structure create_same_directory_structure(events_np_root, frames_np_root) # use multi-thread to accelerate t_ckp = time.time() with ThreadPoolExecutor(max_workers=min(multiprocessing.cpu_count(), 64)) as tpe: print(f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].') for e_root, e_dirs, e_files in os.walk(events_np_root): if e_files.__len__() > 0: output_dir = os.path.join(frames_np_root, os.path.relpath(e_root, events_np_root)) for e_file in e_files: events_np_file = os.path.join(e_root, e_file) print(f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].') tpe.submit(integrate_events_file_to_frames_file_by_fixed_frames_number, events_np_file, output_dir, split_by, frames_number, H, W, True) print(f'Used time = [{round(time.time() - t_ckp, 2)}s].') _root = frames_np_root _loader = load_npz_frames _transform = transform _target_transform = target_transform elif duration is not None: self.max_frames_number = 0 assert duration > 0 and isinstance(duration, int) frames_np_root = os.path.join(root, f'duration_{duration}') if os.path.exists(frames_np_root): print(f'The directory [{frames_np_root}] already exists.') fn_name = os.path.join(frames_np_root, 'max_frames_number.npy') self.max_frames_number = np.load(fn_name).item() print(f'max_frames_number = [{self.max_frames_number}].') else: os.mkdir(frames_np_root) print(f'Mkdir [{frames_np_root}].') # create the same directory structure create_same_directory_structure(events_np_root, frames_np_root) # use multi-thread to accelerate t_ckp = time.time() future_list = [] with ThreadPoolExecutor(max_workers=min(multiprocessing.cpu_count(), 64)) as tpe: print(f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].') for e_root, e_dirs, e_files in os.walk(events_np_root): if e_files.__len__() > 0: output_dir = os.path.join(frames_np_root, os.path.relpath(e_root, events_np_root)) for e_file in e_files: events_np_file = os.path.join(e_root, e_file) print(f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].') future_list.append(tpe.submit(integrate_events_file_to_frames_file_by_fixed_duration, events_np_file, output_dir, duration, H, W, True)) for future in future_list: self.max_frames_number = max(self.max_frames_number, future.result()) # save the max_frames_number in frames_np_root fn_name = os.path.join(frames_np_root, 'max_frames_number.npy') np.save(fn_name, self.max_frames_number) print(f'Save max_frames_number to [{fn_name}].') print(f'Used time = [{round(time.time() - t_ckp, 2)}s].') if padding_frame: if transform is None: transform_with_padding = PadFrames(self.max_frames_number) else: transform_with_padding = transforms.Compose([PadFrames(self.max_frames_number), transform]) _root = frames_np_root _loader = load_npz_frames _transform = transform_with_padding _target_transform = target_transform else: _root = frames_np_root _loader = load_npz_frames _transform = transform _target_transform = target_transform else: raise ValueError('frames_number and duration can not both be None.') if train is not None: if train: _root = os.path.join(_root, 'train') else: _root = os.path.join(_root, 'test') super().__init__(root=_root, loader=_loader, extensions='.npz', transform=_transform, target_transform=_target_transform)
def _check_integrity(self): for filename, md5 in self.filelist: fpath = os.path.join(self.root, self.basedir, filename) if not check_integrity(fpath, md5): return False return True
def _load_meta_file(self): if check_integrity(self.meta_file): return torch.load(self.meta_file) else: raise RuntimeError("Meta file not found or corrupted.", "You can use download=True to create it.")
def _check_integrity(self): root = self.root filename = self.filename fpath = os.path.join(root, filename) return check_integrity(fpath)
def _check_integrity(root) -> bool: zip_filename = 'all_runs' if not check_integrity(join(root, zip_filename + '.zip'), zips_md5[zip_filename]): return False return True
def download(root): if not check_integrity(os.path.join(root, "EuroSAT.zip")): download_and_extract_archive(URL, root, md5=MD5)
def download(self): import tarfile import pickle import shutil if self._check_integrity_data(): return gz_filename = '{0}.tar.gz'.format(self.gz_folder) download_url(self.download_url, self.root, filename=gz_filename, md5=self.gz_md5) with tarfile.open(os.path.join(self.root, gz_filename), 'r:gz') as tar: tar.extractall(path=self.root) train_filename = os.path.join(self.root, self.gz_folder, 'train') check_integrity(train_filename, self.files_md5['train']) with open(train_filename, 'rb') as f: data = pickle.load(f, encoding='bytes') images = data[b'data'] fine_labels = data[b'fine_labels'] coarse_labels = data[b'coarse_labels'] test_filename = os.path.join(self.root, self.gz_folder, 'test') check_integrity(test_filename, self.files_md5['test']) with open(test_filename, 'rb') as f: data = pickle.load(f, encoding='bytes') images = np.concatenate((images, data[b'data']), axis=0) fine_labels = np.concatenate((fine_labels, data[b'fine_labels']), axis=0) coarse_labels = np.concatenate( (coarse_labels, data[b'coarse_labels']), axis=0) images = images.reshape((-1, 3, 32, 32)) images = images.transpose((0, 2, 3, 1)) meta_filename = os.path.join(self.root, self.gz_folder, 'meta') check_integrity(meta_filename, self.files_md5['meta']) with open(meta_filename, 'rb') as f: data = pickle.load(f, encoding='latin1') fine_label_names = data['fine_label_names'] coarse_label_names = data['coarse_label_names'] filename = os.path.join(self.root, self.filename) fine_names = dict() with h5py.File(filename, 'w') as f: for i, coarse_name in enumerate(coarse_label_names): group = f.create_group(coarse_name) fine_indices = np.unique(fine_labels[coarse_labels == i]) for j in fine_indices: dataset = group.create_dataset( fine_label_names[j], data=images[fine_labels == j]) fine_names[coarse_name] = [ fine_label_names[j] for j in fine_indices ] filename_fine_names = os.path.join(self.root, self.filename_fine_names) with open(filename_fine_names, 'w') as f: json.dump(fine_names, f) gz_folder = os.path.join(self.root, self.gz_folder) if os.path.isdir(gz_folder): shutil.rmtree(gz_folder) if os.path.isfile('{0}.tar.gz'.format(gz_folder)): os.remove('{0}.tar.gz'.format(gz_folder))
def _check_integrity(self) -> bool: return check_integrity( os.path.join(self.root, self.dataset_name + '.tar.gz'), self.zip_md5)
def _load_meta_file(root): meta_file = os.path.join(root, 'meta.bin') if check_integrity(meta_file): return torch.load(meta_file) raise RuntimeError("Meta file not found or corrupted.", "You can use download=True to create it.")
def __init__( self, root: str, train: bool = None, data_type: str = 'event', frames_number: int = None, split_by: str = None, duration: int = None, custom_integrate_function: Callable = None, custom_integrated_frames_dir_name: str = None, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, ) -> None: ''' :param root: root path of the dataset :type root: str :param train: whether use the train set. Set ``True`` or ``False`` for those datasets provide train/test division, e.g., DVS128 Gesture dataset. If the dataset does not provide train/test division, e.g., CIFAR10-DVS, please set ``None`` and use :class:`~split_to_train_test_set` function to get train/test set :type train: bool :param data_type: `event` or `frame` :type data_type: str :param frames_number: the integrated frame number :type frames_number: int :param split_by: `time` or `number` :type split_by: str :param duration: the time duration of each frame :type duration: int :param custom_integrate_function: a user-defined function that inputs are ``events, H, W``. ``events`` is a dict whose keys are ``['t', 'x', 'y', 'p']`` and values are ``numpy.ndarray`` ``H`` is the height of the data and ``W`` is the weight of the data. For example, H=128 and W=128 for the DVS128 Gesture dataset. The user should define how to integrate events to frames, and return frames. :type custom_integrate_function: Callable :param custom_integrated_frames_dir_name: The name of directory for saving frames integrating by ``custom_integrate_function``. If ``custom_integrated_frames_dir_name`` is ``None``, it will be set to ``custom_integrate_function.__name__`` :type custom_integrated_frames_dir_name: str or None :param transform: a function/transform that takes in a sample and returns a transformed version. E.g, ``transforms.RandomCrop`` for images. :type transform: callable :param target_transform: a function/transform that takes in the target and transforms it. :type target_transform: callable The base class for neuromorphic dataset. Users can define a new dataset by inheriting this class and implementing all abstract methods. Users can refer to :class:`spikingjelly.datasets.dvs128_gesture.DVS128Gesture`. If ``data_type == 'event'`` the sample in this dataset is a dict whose keys are ``['t', 'x', 'y', 'p']`` and values are ``numpy.ndarray``. If ``data_type == 'frame'`` and ``frames_number`` is not ``None`` events will be integrated to frames with fixed frames number. ``split_by`` will define how to split events. See :class:`cal_fixed_frames_number_segment_index` for more details. If ``data_type == 'frame'``, ``frames_number`` is ``None``, and ``duration`` is not ``None`` events will be integrated to frames with fixed time duration. If ``data_type == 'frame'``, ``frames_number`` is ``None``, ``duration`` is ``None``, and ``custom_integrate_function`` is not ``None``: events will be integrated by the user-defined function and saved to the ``custom_integrated_frames_dir_name`` directory in ``root`` directory. Here is an example from SpikingJelly's tutorials: .. code-block:: python from spikingjelly.datasets.dvs128_gesture import DVS128Gesture from typing import Dict import numpy as np import spikingjelly.datasets as sjds def integrate_events_to_2_frames_randomly(events: Dict, H: int, W: int): index_split = np.random.randint(low=0, high=events['t'].__len__()) frames = np.zeros([2, 2, H, W]) t, x, y, p = (events[key] for key in ('t', 'x', 'y', 'p')) frames[0] = sjds.integrate_events_segment_to_frame(x, y, p, H, W, 0, index_split) frames[1] = sjds.integrate_events_segment_to_frame(x, y, p, H, W, index_split, events['t'].__len__()) return frames root_dir = 'D:/datasets/DVS128Gesture' train_set = DVS128Gesture(root_dir, train=True, data_type='frame', custom_integrate_function=integrate_events_to_2_frames_randomly) from spikingjelly.datasets import play_frame frame, label = train_set[500] play_frame(frame) ''' events_np_root = os.path.join(root, 'events_np') if not os.path.exists(events_np_root): download_root = os.path.join(root, 'download') if os.path.exists(download_root): print( f'The [{download_root}] directory for saving downloaded files already exists, check files...' ) # check files resource_list = self.resource_url_md5() for i in range(resource_list.__len__()): file_name, url, md5 = resource_list[i] fpath = os.path.join(download_root, file_name) if not utils.check_integrity(fpath=fpath, md5=md5): print( f'The file [{fpath}] does not exist or is corrupted.' ) if os.path.exists(fpath): # If file is corrupted, we will remove it. os.remove(fpath) print(f'Remove [{fpath}]') if self.downloadable(): # If file does not exist, we will download it. print( f'Download [{file_name}] from [{url}] to [{download_root}]' ) utils.download_url(url=url, root=download_root, filename=file_name, md5=md5) else: raise NotImplementedError( f'This dataset can not be downloaded by SpikingJelly, please download [{file_name}] from [{url}] manually and put files at {download_root}.' ) else: os.mkdir(download_root) print(f'Mkdir [{download_root}] to save downloaded files.') resource_list = self.resource_url_md5() if self.downloadable(): # download and extract file for i in range(resource_list.__len__()): file_name, url, md5 = resource_list[i] print( f'Download [{file_name}] from [{url}] to [{download_root}]' ) utils.download_url(url=url, root=download_root, filename=file_name, md5=md5) else: raise NotImplementedError( f'This dataset can not be downloaded by SpikingJelly, ' f'please download files manually and put files at [{download_root}]. ' f'The resources file_name, url, and md5 are: \n{resource_list}' ) # We have downloaded files and checked files. Now, let us extract the files extract_root = os.path.join(root, 'extract') if os.path.exists(extract_root): print( f'The directory [{extract_root}] for saving extracted files already exists.\n' f'SpikingJelly will not check the data integrity of extracted files.\n' f'If extracted files are not integrated, please delete [{extract_root}] manually, ' f'then SpikingJelly will re-extract files from [{download_root}].' ) # shutil.rmtree(extract_root) # print(f'Delete [{extract_root}].') else: os.mkdir(extract_root) print(f'Mkdir [{extract_root}].') self.extract_downloaded_files(download_root, extract_root) # Now let us convert the origin binary files to npz files os.mkdir(events_np_root) print(f'Mkdir [{events_np_root}].') print( f'Start to convert the origin data from [{extract_root}] to [{events_np_root}] in np.ndarray format.' ) self.create_events_np_files(extract_root, events_np_root) H, W = self.get_H_W() if data_type == 'event': _root = events_np_root _loader = np.load _transform = transform _target_transform = target_transform elif data_type == 'frame': if frames_number is not None: assert frames_number > 0 and isinstance(frames_number, int) assert split_by == 'time' or split_by == 'number' frames_np_root = os.path.join( root, f'frames_number_{frames_number}_split_by_{split_by}') if os.path.exists(frames_np_root): print(f'The directory [{frames_np_root}] already exists.') else: os.mkdir(frames_np_root) print(f'Mkdir [{frames_np_root}].') # create the same directory structure create_same_directory_structure(events_np_root, frames_np_root) # use multi-thread to accelerate t_ckp = time.time() with ThreadPoolExecutor( max_workers=configure. max_threads_number_for_datasets_preprocess) as tpe: print( f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].' ) for e_root, e_dirs, e_files in os.walk(events_np_root): if e_files.__len__() > 0: output_dir = os.path.join( frames_np_root, os.path.relpath(e_root, events_np_root)) for e_file in e_files: events_np_file = os.path.join( e_root, e_file) print( f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].' ) tpe.submit( integrate_events_file_to_frames_file_by_fixed_frames_number, self.load_events_np, events_np_file, output_dir, split_by, frames_number, H, W, True) print(f'Used time = [{round(time.time() - t_ckp, 2)}s].') _root = frames_np_root _loader = load_npz_frames _transform = transform _target_transform = target_transform elif duration is not None: assert duration > 0 and isinstance(duration, int) frames_np_root = os.path.join(root, f'duration_{duration}') if os.path.exists(frames_np_root): print(f'The directory [{frames_np_root}] already exists.') else: os.mkdir(frames_np_root) print(f'Mkdir [{frames_np_root}].') # create the same directory structure create_same_directory_structure(events_np_root, frames_np_root) # use multi-thread to accelerate t_ckp = time.time() with ThreadPoolExecutor( max_workers=configure. max_threads_number_for_datasets_preprocess) as tpe: print( f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].' ) for e_root, e_dirs, e_files in os.walk(events_np_root): if e_files.__len__() > 0: output_dir = os.path.join( frames_np_root, os.path.relpath(e_root, events_np_root)) for e_file in e_files: events_np_file = os.path.join( e_root, e_file) print( f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].' ) tpe.submit( integrate_events_file_to_frames_file_by_fixed_duration, self.load_events_np, events_np_file, output_dir, duration, H, W, True) print(f'Used time = [{round(time.time() - t_ckp, 2)}s].') _root = frames_np_root _loader = load_npz_frames _transform = transform _target_transform = target_transform elif custom_integrate_function is not None: if custom_integrated_frames_dir_name is None: custom_integrated_frames_dir_name = custom_integrate_function.__name__ frames_np_root = os.path.join( root, custom_integrated_frames_dir_name) if os.path.exists(frames_np_root): print(f'The directory [{frames_np_root}] already exists.') else: os.mkdir(frames_np_root) print(f'Mkdir [{frames_np_root}].') # create the same directory structure create_same_directory_structure(events_np_root, frames_np_root) # use multi-thread to accelerate t_ckp = time.time() with ThreadPoolExecutor( max_workers=configure. max_threads_number_for_datasets_preprocess) as tpe: print( f'Start ThreadPoolExecutor with max workers = [{tpe._max_workers}].' ) for e_root, e_dirs, e_files in os.walk(events_np_root): if e_files.__len__() > 0: output_dir = os.path.join( frames_np_root, os.path.relpath(e_root, events_np_root)) for e_file in e_files: events_np_file = os.path.join( e_root, e_file) print( f'Start to integrate [{events_np_file}] to frames and save to [{output_dir}].' ) tpe.submit( save_frames_to_npz_and_print, os.path.join( output_dir, os.path.basename(events_np_file)), custom_integrate_function( np.load(events_np_file), H, W)) print(f'Used time = [{round(time.time() - t_ckp, 2)}s].') _root = frames_np_root _loader = load_npz_frames _transform = transform _target_transform = target_transform else: raise ValueError( 'At least one of "frames_number", "duration" and "custom_integrate_function" should not be None.' ) if train is not None: if train: _root = os.path.join(_root, 'train') else: _root = os.path.join(_root, 'test') super().__init__(root=_root, loader=_loader, extensions=('.npz', ), transform=_transform, target_transform=_target_transform)
def download_url(url, root, filename=None, md5=None, timeout=4, retries=4, verbose=False, silent=False): """Download a file accessible via URL with mutiple retries Args: url (str or tuple<str, str>): URL to request root (pathlib.Path): folder where the file will be saved in filename (str, optional): name of the output file md5 (str, optional): md5 for integrity verification timeout (float, optional): number of seconds before the request times out retries (int, optional): number of additional allowed download attempts verbose (bool, optional): whether status can be displayed in console silent (bool, optional): whether Exception should be raised upon download failure """ if isinstance(url, tuple): url, filename = url if not isinstance(url, str): raise TypeError('expected argument url to be of type <str>') # Root folder root = Path(root).expanduser() root.mkdir(parents=True, exist_ok=True) if not filename: filename = get_fname(url) fpath = root.joinpath(filename) # Download file if check_integrity(fpath, md5): if verbose: print(f'Using downloaded and verified file: {fpath}') else: success = False # Allow multiple retries for idx in range(retries + 1): try: url_retrieve(url, fpath, timeout) success = True except Exception as e: # Try switching to http if url.startswith('https'): try: url_retrieve(url.replace('https:', 'http:'), fpath, timeout) success = True except Exception: success = False # Handle exception if not success and (idx == retries): if not silent: raise e elif verbose: print(e) if success: break
def _check_integrity(self): root = os.path.join(self.root, self.dataset_name, self.processed_folder) md5 = self.split_list[self.split][2] fpath = os.path.join(root, self.filename) return check_integrity(fpath, md5)
def _check_integrity(self): path_to_zip = os.path.join(self.root, self.filename) return check_integrity(path_to_zip, self.checksum)
def _check_integrity(self): return check_integrity(fpath=str(self.download_root / self.filename), md5=self.file_md5)
def _check_integrity(self): for f, md5 in self.checklist: fpath = os.path.join(self.root, self.base_folder, f) if not check_integrity(fpath, md5): return False return True
def parse_archives(self): if not check_integrity(os.path.join(self.root, META_FILE)): print("creating meta file...") parse_devkit_archive(self.root) print("parsing val archive...") parse_val_archive(self.root, disable=self.disable_parse_val)
def _verify_archive(root, file, md5): if not check_integrity(os.path.join(root, file), md5): msg = ( "The archive {} is not present in the root directory or is corrupted. " "You need to download it externally and place it in {}.") raise RuntimeError(msg.format(file, root))
def _check_integrity(self): for zip_path, zip_md5 in zip(self._zip_paths, self._zip_md5s): if not check_integrity(zip_path, zip_md5): return False return True
def download(root): file_name = os.path.basename(URL) # check existence of zipped file if not check_integrity(os.path.join(root, file_name)): # if not exist, download to under root download_and_extract_archive(URL, root, md5=MD5)
def _check_integrity(self): fpath = os.path.join(self.root, self.filename) if not check_integrity(fpath, self.tgz_md5): return False return True
def _check_integrity(self): data_path = os.path.join(self.root, self.images_filename) labels_path = os.path.join(self.root, self.labels_filename) return (check_integrity(data_path, self.images_md5) and check_integrity(labels_path, self.labels_md5))
def _check_integrity(self): return check_integrity( os.path.join(self.root, self.test_file), self.test_file_md5) and (not self.train or check_integrity( os.path.join(self.root, self.train_file), self.train_file_md5))
def _check_integrity(self): root = self.root fpath = os.path.join(root, self.filename) if not check_integrity(fpath, self.md5_checksum): return False return True
def _check_integrity(self): if not check_integrity(join(self.root, self.filename + '.zip'), self.zips_md5[self.filename]): return False return True
def __init__(self, root, fold="train", transform=None, target_transform=None, download=False): fold = fold.lower() self.train = False self.test = False self.val = False if fold == "train": self.train = True elif fold == "test": self.test = True elif fold == "val": self.val = True else: raise RuntimeError("Not train-val-test") self.root = os.path.expanduser(root) self.transform = transform self.target_transform = target_transform fpath = os.path.join(root, self.filename) print('fpath =', fpath) print('tgz_md5 =', self.tgz_md5) if not check_integrity(fpath, self.tgz_md5): raise RuntimeError('You have not placed the' + ' cifar-100-cs543-python.tar.gz file in the' + ' root folder you specified. Please check' + ' and retry.') if not self._check_integrity(): raise RuntimeError('Dataset not found or corrupted.' + ' Download it and extract the file again.') # now load the picked numpy arrays if self.train or self.val: self.train_data = [] self.train_labels = [] for fentry in self.train_list: f = fentry[0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.train_data.append(entry['data']) if 'labels' in entry: self.train_labels += entry['labels'] else: self.train_labels += entry['fine_labels'] fo.close() self.train_data = np.concatenate(self.train_data) self.train_data = self.train_data.reshape((50000, 3, 32, 32)) self.train_data = self.train_data.transpose( (0, 2, 3, 1)) # convert to HWC p = np.arange(0, 50000, 10) mask_train = np.ones((50000, ), dtype=bool) mask_train[p] = False mask_val = np.zeros((50000, ), dtype=bool) mask_val[p] = True copy_all_data = np.array(self.train_data) self.val_data = np.array(copy_all_data[mask_val]) self.train_data = np.array(copy_all_data[mask_train]) copy_all_labels = np.array(self.train_labels) self.val_labels = np.array(copy_all_labels[mask_val]) self.train_labels = np.array(copy_all_labels[mask_train]) elif self.test: f = self.test_list[0][0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.test_data = entry['data'] if 'labels' in entry: self.test_labels = entry['labels'] else: self.test_labels = entry['fine_labels'] fo.close() self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose( (0, 2, 3, 1)) # convert to HWC
def _check_integrity(self): for tar_path, tar_md5 in zip(self._tar_paths, self._tar_md5s): if not check_integrity(tar_path, tar_md5): return False return True