def _guess_archive_loader( self, path: pathlib.Path ) -> Optional[Callable[[IterDataPipe[Tuple[str, IO]]], IterDataPipe[Tuple[str, IO]]]]: try: _, archive_type, _ = _detect_file_type(path.name) except RuntimeError: return None return self._ARCHIVE_LOADERS.get(archive_type) # type: ignore[arg-type]
def test_detect_file_type(self): for file, expected in [ ("foo.tar.xz", (".tar.xz", ".tar", ".xz")), ("foo.tar", (".tar", ".tar", None)), ("foo.tar.gz", (".tar.gz", ".tar", ".gz")), ("foo.tgz", (".tgz", ".tar", ".gz")), ("foo.gz", (".gz", None, ".gz")), ("foo.zip", (".zip", ".zip", None)), ("foo.xz", (".xz", None, ".xz")), ]: with self.subTest(file=file): self.assertSequenceEqual(utils._detect_file_type(file), expected)
def download_from_url( *, file_info: Union[UrlFileInfo, List[UrlFileInfo]], root: Union[Path, str], logger: Optional[logging.Logger] = None, remove_finished: bool = True, ) -> None: logger = logging.getLogger(__name__) if logger is None else logger file_info_ls = file_info if isinstance(file_info, list) else [file_info] if not isinstance(root, Path): root = Path(root).expanduser() # Create the specified root directory if it doesn't already exist root.mkdir(parents=True, exist_ok=True) for info in file_info_ls: filepath = root / info.name filepath_str = str(filepath) suffix = _detect_file_type(filepath_str)[0] extracted_filepath = Path(filepath_str.split(suffix)[0]) if extracted_filepath.exists(): logger.info(f"File '{info.name}' already downloaded and extracted.") else: if filepath.exists(): logger.info(f"File '{info.name}' already downloaded.") else: logger.info(f"Downloading file '{info.name}' from address '{info.url}'.") download_url(url=info.url, filename=info.name, root=str(root), md5=info.md5) logger.info(f"Extracting '{filepath.resolve()}' to '{root.resolve()}'") try: extract_archive( from_path=str(filepath), to_path=str(extracted_filepath), remove_finished=remove_finished, ) # Fall back on using jar to unzip the archive except BadZipFile: try: subprocess.run(["jar", "-xvf", str(filepath)], check=True, cwd=root) except subprocess.CalledProcessError: logger.info( "Attempted to fall back on using Java to extract malformed .zip file; " "however, there was a problem. Try redownloading the zip file or " "checking that Java has been properly added to your system variables." )
def test_detect_file_type_unknown_partial_ext(self): with self.assertRaises(RuntimeError): utils._detect_file_type("foo.bar")
def test_detect_file_type_unknown_compression(self): with self.assertRaises(RuntimeError): utils._detect_file_type("foo.tar.baz")
def test_detect_file_type_unknown_archive_type(self): with self.assertRaises(RuntimeError): utils._detect_file_type("foo.bar.gz")
def test_detect_file_type_to_many_exts(self): with self.assertRaises(RuntimeError): utils._detect_file_type("foo.bar.tar.gz")
def test_detect_file_type_no_ext(self): with self.assertRaises(RuntimeError): utils._detect_file_type("foo")
def test_detect_file_type_incompatible(self, file): # tests detect file type for no extension, unknown compression and unknown partial extension with pytest.raises(RuntimeError): utils._detect_file_type(file)
def test_detect_file_type(self, file, expected): assert utils._detect_file_type(file) == expected