def _loader(self, path: pathlib.Path) -> IterDataPipe[Tuple[str, IO]]: if path.is_dir(): return FileOpener(FileLister(str(path), recursive=True), mode="rb") dp = FileOpener(IterableWrapper((str(path),)), mode="rb") archive_loader = self._guess_archive_loader(path) if archive_loader: dp = archive_loader(dp) return dp
def molecule_datapipe() -> IterDataPipe: # Download HIV dataset from MoleculeNet: url = 'https://deepchemdata.s3-us-west-1.amazonaws.com/datasets' root_dir = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data') path = download_url(f'{url}/HIV.csv', root_dir) datapipe = FileOpener([path]) datapipe = datapipe.parse_csv_as_dict() datapipe = datapipe.parse_smiles(target_key='HIV_active') datapipe = datapipe.in_memory_cache() # Cache graph instances in-memory. return datapipe
def from_data_folder( root: Union[str, pathlib.Path], *, decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = None, valid_extensions: Optional[Collection[str]] = None, recursive: bool = True, ) -> Tuple[IterDataPipe, List[str]]: root = pathlib.Path(root).expanduser().resolve() categories = sorted(entry.name for entry in os.scandir(root) if entry.is_dir()) masks: Union[List[str], str] = [f"*.{ext}" for ext in valid_extensions ] if valid_extensions is not None else "" dp = FileLister(str(root), recursive=recursive, masks=masks) dp: IterDataPipe = Filter( dp, functools.partial(_is_not_top_level_file, root=root)) dp = hint_sharding(dp) dp = Shuffler(dp, buffer_size=INFINITE_BUFFER_SIZE) dp = FileOpener(dp, mode="rb") return ( Mapper( dp, functools.partial(_collate_and_decode_data, root=root, categories=categories, decoder=decoder)), categories, )
def from_data_folder( root: Union[str, pathlib.Path], *, valid_extensions: Optional[Collection[str]] = None, recursive: bool = True, ) -> Tuple[IterDataPipe, List[str]]: root = pathlib.Path(root).expanduser().resolve() categories = sorted(entry.name for entry in os.scandir(root) if entry.is_dir()) masks: Union[List[str], str] = [f"*.{ext}" for ext in valid_extensions] if valid_extensions is not None else "" dp = FileLister(str(root), recursive=recursive, masks=masks) dp: IterDataPipe = Filter(dp, functools.partial(_is_not_top_level_file, root=root)) dp = hint_sharding(dp) dp = hint_shuffling(dp) dp = FileOpener(dp, mode="rb") return Mapper(dp, functools.partial(_prepare_sample, root=root, categories=categories)), categories