def transcode_file( source_file: PathOrFile, dest_file: PathOrFile, source_compression: CompressionArg = True, dest_compression: CompressionArg = True, use_system: bool = True, source_open_args: Optional[dict] = None, dest_open_args: Optional[dict] = None, ) -> None: """Convert from one file format to another. Args: source_file: The path or file-like object to read from. If a file, it must be opened in mode 'rb'. dest_file: The path or file-like object to write to. If a file, it must be opened in binary mode. source_compression: The compression type of the source file. If True, guess compression format from the file name, otherwise the name of any supported compression format. dest_compression: The compression type of the dest file. If True, guess compression format from the file name, otherwise the name of any supported compression format. use_system: Whether to use system-level compression. source_open_args: Additional arguments to pass to xopen for the source file. dest_open_args: Additional arguments to pass to xopen for the destination file. """ src_args = copy.copy(source_open_args) if source_open_args else {} if "mode" not in src_args: src_args["mode"] = "rb" dst_args = copy.copy(dest_open_args) if dest_open_args else {} if "mode" not in dst_args: dst_args["mode"] = "wb" with open_(source_file, compression=source_compression, use_system=use_system, **src_args) as src, open_(dest_file, compression=dest_compression, use_system=use_system, **dst_args) as dst: for chunk in iter_file_chunked(src): dst.write(chunk)
def read_bytes(path_or_file: PathOrFile, chunksize: int = 1024, **kwargs) -> Generator[bytes, None, None]: """Iterate over a file in chunks. The mode will always be overridden to 'rb'. Args: path_or_file: Path to the file, or a file-like object. chunksize: Number of bytes to read at a time. kwargs: Additional arguments to pass top :method:`xphyle.open_`. Yields: Chunks of the input file as bytes. Each chunk except the last should be of size `chunksize`. """ kwargs["mode"] = "rb" with open_(path_or_file, **kwargs) as fileobj: if fileobj is None: return yield from iter_file_chunked(fileobj, chunksize)
def decompress_file(self, source: PathOrFile, dest: PathOrFile = None, keep: bool = True, use_system: bool = True, **kwargs) -> str: """Decompress data from one file and write to another. Args: source: Source file, either a path or an open file-like object. dest: Destination file, either a path or an open file-like object. If None, the file name is determined from ``source``. keep: Whether to keep the source file use_system: Whether to try to use system-level compression kwargs: Additional arguments to passs to the open method when opening the compressed file Returns: Path to the destination file """ source_is_path = isinstance(source, str) if source_is_path: source_path = str(source) check_readable_file(source_path) else: source_path = cast(FileLike, source).name source_parts = split_path(source_path) if dest is None: if len(source_parts) > 2: dest = (os.path.join(*source_parts[0:2]) + ''.join(source_parts[2:-1])) dest_is_path = True else: raise Exception("Cannot determine path for decompressed file") else: dest_is_path = isinstance(dest, str) if dest_is_path: dest_name = str(check_writable_file(str(dest))) dest_file = open(dest_name, 'wb') else: dest_file = cast(FileLike, dest) dest_name = dest_file.name # pragma: no-cover try: dest_file.fileno() except OSError: use_system = False try: if use_system and self.can_use_system_decompression: src = str(source) if source_is_path else STDIN cmd = self.get_command('d', src=src) psrc = None if source_is_path else cast(FileLike, source) proc = PROCESS_PROGRESS.wrap(cmd, stdin=psrc, stdout=dest_file) proc.communicate() else: source_file = self.open_file_python(source, 'rb', **kwargs) try: # Perform sequential decompression as the source # file might be quite large for chunk in iter_file_chunked(source_file): dest_file.write(chunk) finally: if source_is_path: source_file.close() if not keep: if not source_is_path: cast(FileLike, source).close() os.remove(source_path) finally: if dest_is_path: dest_file.close() return dest_name
def compress_file(self, source: PathOrFile, dest: PathOrFile = None, keep: bool = True, compresslevel: int = None, use_system: bool = True, **kwargs) -> str: """Compress data from one file and write to another. Args: source: Source file, either a path or an open file-like object. dest: Destination file, either a path or an open file-like object. If None, the file name is determined from ``source``. keep: Whether to keep the source file compresslevel: Compression level use_system: Whether to try to use system-level compression kwargs: Additional arguments to pass to the open method when opening the destination file Returns: Path to the destination file. """ source_is_path = isinstance(source, str) if source_is_path: source_path = str(source) check_readable_file(source_path) else: source_io = cast(IO, source) source_path = source_io.name # pragma: no-cover try: source_io.fileno() except OSError: use_system = False if dest is None: dest = "{}.{}".format(source_path, self.default_ext) dest_is_path = True else: dest_is_path = isinstance(dest, str) if dest_is_path: check_writable_file(str(dest)) try: if use_system and self.can_use_system_compression: if source_is_path: cmd_src = str(source) prc_src = None else: cmd_src = STDIN prc_src = cast(FileLike, source) if dest_is_path: dest_name = str(dest) dest_file = open(dest_name, 'wb') else: dest_file = cast(FileLike, dest) dest_name = dest_file.name cmd = self.get_command('c', src=cmd_src, compresslevel=compresslevel) proc = PROCESS_PROGRESS.wrap(cmd, stdin=prc_src, stdout=dest_file) proc.communicate() else: if source_is_path: source_file = open(str(source), 'rb') else: source_file = cast(FileLike, source) dest_name = str(dest) dest_file = self.open_file_python(dest, 'wb', **kwargs) try: # Perform sequential compression as the source # file might be quite large for chunk in iter_file_chunked(source_file): dest_file.write(chunk) finally: if source_is_path: source_file.close() if not keep: if not source_is_path: cast(FileLike, source).close() os.remove(source_path) finally: if dest_is_path: dest_file.close() return dest_name