Example #1
0
def transcode_file(
    source_file: PathOrFile,
    dest_file: PathOrFile,
    source_compression: CompressionArg = True,
    dest_compression: CompressionArg = True,
    use_system: bool = True,
    source_open_args: Optional[dict] = None,
    dest_open_args: Optional[dict] = None,
) -> None:
    """Convert from one file format to another.

    Args:
        source_file: The path or file-like object to read from. If a file, it
            must be opened in mode 'rb'.
        dest_file: The path or file-like object to write to. If a file, it
            must be opened in binary mode.
        source_compression: The compression type of the source file. If True,
            guess compression format from the file name, otherwise the name of
            any supported compression format.
        dest_compression: The compression type of the dest file. If True,
            guess compression format from the file name, otherwise the name of
            any supported compression format.
        use_system: Whether to use system-level compression.
        source_open_args: Additional arguments to pass to xopen for the source
            file.
        dest_open_args: Additional arguments to pass to xopen for the
            destination file.
    """
    src_args = copy.copy(source_open_args) if source_open_args else {}
    if "mode" not in src_args:
        src_args["mode"] = "rb"
    dst_args = copy.copy(dest_open_args) if dest_open_args else {}
    if "mode" not in dst_args:
        dst_args["mode"] = "wb"
    with open_(source_file,
               compression=source_compression,
               use_system=use_system,
               **src_args) as src, open_(dest_file,
                                         compression=dest_compression,
                                         use_system=use_system,
                                         **dst_args) as dst:
        for chunk in iter_file_chunked(src):
            dst.write(chunk)
Example #2
0
def read_bytes(path_or_file: PathOrFile,
               chunksize: int = 1024,
               **kwargs) -> Generator[bytes, None, None]:
    """Iterate over a file in chunks. The mode will always be overridden
    to 'rb'.

    Args:
        path_or_file: Path to the file, or a file-like object.
        chunksize: Number of bytes to read at a time.
        kwargs: Additional arguments to pass top :method:`xphyle.open_`.

    Yields:
        Chunks of the input file as bytes. Each chunk except the last should
        be of size `chunksize`.
    """
    kwargs["mode"] = "rb"
    with open_(path_or_file, **kwargs) as fileobj:
        if fileobj is None:
            return
        yield from iter_file_chunked(fileobj, chunksize)
Example #3
0
    def decompress_file(self,
                        source: PathOrFile,
                        dest: PathOrFile = None,
                        keep: bool = True,
                        use_system: bool = True,
                        **kwargs) -> str:
        """Decompress data from one file and write to another.
        
        Args:
            source: Source file, either a path or an open file-like object.
            dest: Destination file, either a path or an open file-like object.
                If None, the file name is determined from ``source``.
            keep: Whether to keep the source file
            use_system: Whether to try to use system-level compression
            kwargs: Additional arguments to passs to the open method when
                opening the compressed file
        
        Returns:
            Path to the destination file
        """
        source_is_path = isinstance(source, str)
        if source_is_path:
            source_path = str(source)
            check_readable_file(source_path)
        else:
            source_path = cast(FileLike, source).name
        source_parts = split_path(source_path)

        if dest is None:
            if len(source_parts) > 2:
                dest = (os.path.join(*source_parts[0:2]) +
                        ''.join(source_parts[2:-1]))
                dest_is_path = True
            else:
                raise Exception("Cannot determine path for decompressed file")
        else:
            dest_is_path = isinstance(dest, str)
        if dest_is_path:
            dest_name = str(check_writable_file(str(dest)))
            dest_file = open(dest_name, 'wb')
        else:
            dest_file = cast(FileLike, dest)
            dest_name = dest_file.name
            # pragma: no-cover
            try:
                dest_file.fileno()
            except OSError:
                use_system = False

        try:
            if use_system and self.can_use_system_decompression:
                src = str(source) if source_is_path else STDIN
                cmd = self.get_command('d', src=src)
                psrc = None if source_is_path else cast(FileLike, source)
                proc = PROCESS_PROGRESS.wrap(cmd, stdin=psrc, stdout=dest_file)
                proc.communicate()
            else:
                source_file = self.open_file_python(source, 'rb', **kwargs)
                try:
                    # Perform sequential decompression as the source
                    # file might be quite large
                    for chunk in iter_file_chunked(source_file):
                        dest_file.write(chunk)
                finally:
                    if source_is_path:
                        source_file.close()

            if not keep:
                if not source_is_path:
                    cast(FileLike, source).close()
                os.remove(source_path)
        finally:
            if dest_is_path:
                dest_file.close()

        return dest_name
Example #4
0
    def compress_file(self,
                      source: PathOrFile,
                      dest: PathOrFile = None,
                      keep: bool = True,
                      compresslevel: int = None,
                      use_system: bool = True,
                      **kwargs) -> str:
        """Compress data from one file and write to another.
        
        Args:
            source: Source file, either a path or an open file-like object.
            dest: Destination file, either a path or an open file-like object.
                If None, the file name is determined from ``source``.
            keep: Whether to keep the source file
            compresslevel: Compression level
            use_system: Whether to try to use system-level compression
            kwargs: Additional arguments to pass to the open method when opening
                the destination file
        
        Returns:
            Path to the destination file.
        """
        source_is_path = isinstance(source, str)
        if source_is_path:
            source_path = str(source)
            check_readable_file(source_path)
        else:
            source_io = cast(IO, source)
            source_path = source_io.name
            # pragma: no-cover
            try:
                source_io.fileno()
            except OSError:
                use_system = False

        if dest is None:
            dest = "{}.{}".format(source_path, self.default_ext)
            dest_is_path = True
        else:
            dest_is_path = isinstance(dest, str)
        if dest_is_path:
            check_writable_file(str(dest))

        try:
            if use_system and self.can_use_system_compression:
                if source_is_path:
                    cmd_src = str(source)
                    prc_src = None
                else:
                    cmd_src = STDIN
                    prc_src = cast(FileLike, source)
                if dest_is_path:
                    dest_name = str(dest)
                    dest_file = open(dest_name, 'wb')
                else:
                    dest_file = cast(FileLike, dest)
                    dest_name = dest_file.name
                cmd = self.get_command('c',
                                       src=cmd_src,
                                       compresslevel=compresslevel)
                proc = PROCESS_PROGRESS.wrap(cmd,
                                             stdin=prc_src,
                                             stdout=dest_file)
                proc.communicate()
            else:
                if source_is_path:
                    source_file = open(str(source), 'rb')
                else:
                    source_file = cast(FileLike, source)
                dest_name = str(dest)
                dest_file = self.open_file_python(dest, 'wb', **kwargs)
                try:
                    # Perform sequential compression as the source
                    # file might be quite large
                    for chunk in iter_file_chunked(source_file):
                        dest_file.write(chunk)
                finally:
                    if source_is_path:
                        source_file.close()

            if not keep:
                if not source_is_path:
                    cast(FileLike, source).close()
                os.remove(source_path)
        finally:
            if dest_is_path:
                dest_file.close()

        return dest_name