Beispiel #1
0
def decompress_file(compressed_file: PathOrFile,
                    dest_file: PathOrFile = None,
                    compression: CompressionArg = None,
                    keep: bool = True,
                    use_system: bool = True,
                    **kwargs) -> Path:
    """Decompress an existing file, either in-place or to a separate file.

    Args:
        compressed_file: Path or file-like object to decompress.
        dest_file: Path or file-like object for the decompressed file.
            If None, file will be decompressed in-place. If True, file will be
            decompressed to a new file (and the compressed file retained) whose
            name is determined automatically.
        compression: None or True, to guess compression format from the file
            name, or the name of any supported compression format.
        keep: Whether to keep the source file.
        use_system: Whether to try to use system-level compression
        kwargs: Additional arguments to pass to the open method when
            opening the compressed file.

    Returns:
        The path of the decompressed file.
    """
    if not isinstance(compression, str):
        if not isinstance(compressed_file, PurePath):
            source_path = Path(getattr(compressed_file, "name"))
        else:
            source_path = cast(PurePath, compressed_file)
        compression = FORMATS.guess_compression_format(source_path)
    fmt = FORMATS.get_compression_format(compression)
    return fmt.decompress_file(compressed_file, dest_file, keep, use_system,
                               **kwargs)
Beispiel #2
0
def get_compressor(
        name_or_path: Union[str, PurePath]) -> Optional[CompressionFormat]:
    """
    Returns the `CompressionFormat` for the given path or compression type name.
    """
    fmt = FORMATS.guess_compression_format(name_or_path)
    if (fmt is None and isinstance(name_or_path, PurePath)
            and safe_check_readable_file(cast(PurePath, name_or_path))):
        fmt = FORMATS.guess_format_from_file_header(name_or_path)
    if fmt:
        return FORMATS.get_compression_format(fmt)
Beispiel #3
0
def guess_file_format(path: PurePath) -> str:
    """
    Try to guess the file format, first from the extension, and then from the header
    bytes.

    Args:
        path: The path to the file

    Returns:
        The v format, or None if one could not be determined
    """
    fmt = FORMATS.guess_compression_format(path)
    if fmt is None and safe_check_readable_file(path):
        fmt = FORMATS.guess_format_from_file_header(path)
    return fmt
Beispiel #4
0
def guess_file_format(path: str) -> str:
    """Try to guess the file format, first from the extension, and then
    from the header bytes.
    
    Args:
        path: The path to the file
    
    Returns:
        The v format, or None if one could not be determined
    """
    if path in (STDOUT, STDERR):
        raise ValueError("Cannot guess format from {}".format(path))
    fmt = FORMATS.guess_compression_format(path)
    if fmt is None and safe_check_readable_file(path):
        fmt = FORMATS.guess_format_from_file_header(path)
    return fmt
Beispiel #5
0
def compress_file(source_file: PathOrFile,
                  compressed_file: PathOrFile = None,
                  compression: CompressionArg = None,
                  keep: bool = True,
                  compresslevel: int = None,
                  use_system: bool = True,
                  **kwargs) -> Path:
    """Compress an existing file, either in-place or to a separate file.

    Args:
        source_file: Path or file-like object to compress.
        compressed_file: The compressed path or file-like object. If None,
            compression is performed in-place. If True, file name is determined
            from ``source_file`` and the decompressed file is retained.
        compression: If True, guess compression format from the file
            name, otherwise the name of any supported compression format.
        keep: Whether to keep the source file.
        compresslevel: Compression level.
        use_system: Whether to try to use system-level compression.
        kwargs: Additional arguments to pass to the open method when
            opening the compressed file.

    Returns:
        The path to the compressed file.
    """
    if not isinstance(compression, str):
        if compressed_file:
            if isinstance(compressed_file, PurePath):
                name = str(compressed_file)
            else:
                name = cast(FileLike, compressed_file).name
            compression = FORMATS.guess_compression_format(name)
        else:
            raise ValueError(
                "'compressed_file' or 'compression' must be specified")

    fmt = FORMATS.get_compression_format(compression)
    return fmt.compress_file(source_file, compressed_file, keep, compresslevel,
                             use_system, **kwargs)
Beispiel #6
0
def uncompressed_size(path: PurePath,
                      compression: CompressionArg = None) -> Optional[int]:
    """Get the uncompressed size of the compressed file.

    Args:
        path: The path to the compressed file.
        compression: None or True, to guess compression format from the file
            name, or the name of any supported compression format.

    Returns:
        The uncompressed size of the file in bytes, or None if the uncompressed
        size could not be determined (without actually decompressing the file).

    Raises:
        ValueError if the compression format is not supported.
    """
    if compression is not False:
        if not isinstance(compression, str):
            compression = FORMATS.guess_compression_format(path)
        if compression is not None:
            fmt = FORMATS.get_compression_format(compression)
            return fmt.uncompressed_size(path)
    # Assume the file is not compressed to begin with
    return Path(path).stat().st_size
Beispiel #7
0
def xopen(
    target: OpenArg,
    mode: ModeArg = None,
    compression: CompressionArg = None,
    use_system: bool = True,
    allow_subprocesses: bool = True,
    context_wrapper: bool = None,
    file_type: FileType = None,
    validate: bool = True,
    overwrite: bool = True,
    close_fileobj: bool = True,
    **kwargs,
) -> FileLike:
    """
    Replacement for the builtin `open` function that can also open URLs and
    subprocessess, and automatically handles compressed files.

    Args:
        target: A relative or absolute path, a URL, a system command, a
            file-like object, or :class:`bytes` or :class:`str` to
            indicate a writeable byte/string buffer.
        mode: Some combination of the access mode ('r', 'w', 'a', or 'x')
            and the open mode ('b' or 't'). If the later is not given, 't'
            is used by default.
        compression: If None or True, compression type (if any) will be
            determined automatically. If False, no attempt will be made to
            determine compression type. Otherwise this must specify the
            compression type (e.g. 'gz'). See `xphyle.compression` for
            details. Note that compression will *not* be guessed for
            '-' (stdin).
        use_system: Whether to attempt to use system-level compression
            programs.
        allow_subprocesses: Whether to allow `path` to be a subprocess (e.g.
            '|cat'). There are security risks associated with allowing
            users to run arbitrary system commands.
        context_wrapper: If True, the file is wrapped in a `FileLikeWrapper`
            subclass before returning (`FileWrapper` for files/URLs,
            `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value
            (set using :method:`configure`) is used.
        file_type: a FileType; explicitly specify the file type. By default the
            file type is detected, but auto-detection might make mistakes, e.g.
            a local file contains a colon (':') in the name.
        validate: Ensure that the user-specified compression format matches the
            format guessed from the file extension or magic bytes.
        overwrite: For files opened in write mode, whether to overwrite
            existing files (True).
        close_fileobj: When `path` is a file-like object / `file_type` is
            FileType.FILELIKE, and `context_wrapper` is True, whether to close
            the underlying file when closing the wrapper.
        kwargs: Additional keyword arguments to pass to ``open``.

    `path` is interpreted as follows:
        * If starts with '|', it is assumed to be a system command
        * If a file-like object, it is used as-is
        * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used
        * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL
        * If file_type == FileType.BUFFER and path is a string or bytes and
          mode is readable, a new StringIO/BytesIO is created with 'path' passed
          to its constructor.
        * Otherwise it is assumed to be a local file

    If `use_system` is True and the file is compressed, the file is opened with
    a pipe to the system-level compression program (e.g. ``gzip`` for '.gz'
    files) if possible, otherwise the corresponding python library is used.

    Returns:
        A Process if `file_type` is PROCESS, or if `file_type` is None and
        `path` starts with '|'. Otherwise, an opened file-like object. If
        `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`.

    Raises:
        ValueError if:
            * ``compression`` is True and compression format cannot be
            determined
            * the specified compression format is invalid
            * ``validate`` is True and the specified compression format is not
                the acutal format of the file
            * the path or mode are invalid
    """
    if compression and isinstance(compression, str):
        cannonical_fmt_name = FORMATS.get_compression_format_name(compression)
        if cannonical_fmt_name is None:
            raise ValueError(
                "Invalid compression format: {}".format(compression))
        else:
            compression = cannonical_fmt_name

    # Convert placeholder strings ("-", "_") to paths
    target = convert_std_placeholder(target, mode)

    # Whether the file object is stdin/stdout/stderr
    is_std = target in (STDIN, STDOUT, STDERR)
    # Whether 'target' is currently a file-like object in binary mode
    is_bin = False
    # Whether target is a string
    is_str = isinstance(target, str)
    # Whether target is a Path
    is_path = not is_std and isinstance(target, PurePath)
    # Whether target is a class indicating a buffer type
    is_buffer = target in (str, bytes)

    if not file_type:
        if is_path:
            file_type = FileType.LOCAL
        elif is_std:
            file_type = FileType.STDIO
        elif is_buffer:
            file_type = FileType.BUFFER
        elif not is_str:
            file_type = FileType.FILELIKE
        elif target.startswith("|"):
            file_type = FileType.PROCESS
    elif file_type == FileType.BUFFER and (is_str or is_path
                                           or isinstance(target, bytes)):
        if not mode:
            mode = FileMode(access="r", coding="t" if is_str else "b")
        is_buffer = True
    elif ((is_str or is_path or is_buffer) == (file_type is FileType.FILELIKE)
          or is_std != (file_type is FileType.STDIO) or is_buffer !=
          (file_type is FileType.BUFFER)):
        raise ValueError(
            f"file_type = {file_type} does not match target {target}")

    url_parts = None
    if file_type in (FileType.URL, None):
        url_parts = parse_url(target)
        if not file_type:
            file_type = FileType.URL if url_parts else FileType.LOCAL
        elif not url_parts:
            raise ValueError(f"{target} is not a valid URL")

    if not mode:
        # set to default
        if not is_buffer:
            mode = FileMode()
        elif target == str:
            mode = FileMode("wt")
        else:
            mode = FileMode("wb")
    elif isinstance(mode, str):
        if "U" in mode and "newline" in kwargs and kwargs[
                "newline"] is not None:
            raise ValueError(
                "newline={} not compatible with universal newlines ('U') "
                "mode".format(kwargs["newline"]))
        mode = FileMode(mode)

    if context_wrapper is None:
        context_wrapper = DEFAULTS["xopen_context_wrapper"]

    # Return early if opening a process
    if file_type is FileType.PROCESS:
        if not allow_subprocesses:
            raise ValueError("Subprocesses are disallowed")
        if target.startswith("|"):
            target = target[1:]
        popen_args = dict(kwargs)
        for std in ("stdin", "stdout", "stderr"):
            popen_args[std] = PIPE
        if mode.writable:
            if compression is True:
                raise ValueError(
                    "Can determine compression automatically when writing to "
                    "process stdin")
            elif compression is None:
                compression = False
            outstream = "stdin"
        else:
            outstream = "stdout"
        popen_args[outstream] = dict(
            mode=mode,
            compression=compression,
            validate=validate,
            context_wrapper=context_wrapper,
        )
        return popen(target, **popen_args)

    buffer = None

    if file_type is FileType.BUFFER:
        if target == str:
            target = io.StringIO()
        elif target == bytes:
            target = io.BytesIO()
            is_bin = True
        elif is_str or isinstance(target, bytes):
            if not mode.readable:
                raise ValueError(
                    "'mode' must be readable when 'file_type' == BUFFER "
                    "and 'target' is string or bytes.")
            if is_str:
                if mode.coding != ModeCoding.TEXT:
                    raise ValueError("Must use text mode with a string buffer")
                target = io.StringIO(target)
            else:
                if mode.coding != ModeCoding.BINARY:
                    raise ValueError(
                        "Must use binary mode with a bytes buffer")
                target = io.BytesIO(target)
                is_bin = True
        if context_wrapper:
            buffer = target
        if not mode.readable:
            if compression is True:
                raise ValueError(
                    "Cannot guess compression for a write-only buffer")
            elif compression is None:
                compression = False
            validate = False

    # The file handle we will open
    # TODO: figure out the right type
    fileobj: Any = None
    # The name to use for the file
    name = None
    # Guessed compression type, if compression in (None, True)
    guess = None
    # Whether to try and guess file format
    guess_format = compression in (None, True)
    # Whether to validate that the actual compression format matches expected
    validate = validate and bool(compression) and not guess_format

    if file_type is FileType.STDIO:
        use_system = False
        if target == STDERR:
            if not mode.writable:
                raise ValueError("Mode must be writable for stderr")
            stdobj = sys.stderr
        else:
            stdobj = sys.stdin if mode.readable else sys.stdout

        # whether we need the underlying byte stream regardless of the mode
        check_readable = mode.readable and (validate or guess_format)

        if mode.binary or compression or check_readable:
            # get the underlying binary stream
            fileobj = stdobj.buffer
            is_bin = True
        else:
            fileobj = stdobj

        if check_readable:
            if not hasattr(fileobj, "peek"):
                fileobj = io.BufferedReader(fileobj)
            guess = FORMATS.guess_format_from_buffer(fileobj)
        else:
            validate = False
    elif file_type in (FileType.FILELIKE, FileType.BUFFER):
        fileobj = target
        use_system = False

        # determine mode of fileobj
        if hasattr(fileobj, "mode"):
            fileobj_mode = FileMode(target.mode)
        elif hasattr(fileobj, "readable"):
            access = ModeAccess.READWRITE
            # if fileobj.readable and fileobj.writable:
            #     access = ModeAccess.READWRITE
            # elif fileobj.writable:
            #     access = ModeAccess.WRITE
            # else:
            #     access = ModeAccess.READ
            fileobj_mode = FileMode(
                access=access,
                coding="t" if hasattr(fileobj, "encoding") else "b")
        else:  # pragma: no-cover
            # TODO I don't think we can actually get here, but leaving for now.
            raise ValueError("Cannot determine file mode")

        # make sure modes are compatible
        if not ((mode.readable and fileobj_mode.readable) or
                (mode.writable and fileobj_mode.writable)):
            raise ValueError(
                "mode {} and file mode {} are not compatible".format(
                    mode, fileobj_mode))

        # compression/decompression only possible for binary files
        is_bin = fileobj_mode.binary
        if not is_bin:
            if compression:
                raise ValueError(
                    "Cannot compress to/decompress from a text-mode "
                    "file/buffer")
            else:
                # noinspection PyUnusedLocal
                guess_format = False
        elif validate or guess_format:
            if mode.readable:
                if not hasattr(fileobj, "peek"):
                    fileobj = io.BufferedReader(fileobj)
                guess = FORMATS.guess_format_from_buffer(fileobj)
            elif hasattr(fileobj, "name") and isinstance(fileobj.name, str):
                guess = FORMATS.guess_compression_format(fileobj.name)
            else:
                raise ValueError(
                    "Could not guess compression format from {}".format(
                        target))
    elif file_type is FileType.URL:
        if not mode.readable:
            raise ValueError("URLs can only be opened in read mode")

        fileobj = open_url(target)
        if not fileobj:
            raise ValueError("Could not open URL {}".format(target))

        use_system = False
        name = get_url_file_name(fileobj, url_parts)

        # Get compression format if not specified
        if validate or guess_format:
            guess = FORMATS.guess_format_from_buffer(fileobj)
            # The following code is never used, unless there is some
            # scenario in which the file type cannot be guessed from
            # the header bytes. I'll leave this here for now but keep
            # it commented out until someone provides an example of
            # why it's necessary.
            # if guess is None and guess_format:
            #     # Check if the MIME type indicates that the file is
            #     # compressed
            #     mime = get_url_mime_type(fileobj)
            #     if mime:
            # TODO: look at this https://github.com/dbtsai/python-mimeparse
            # or similar for mime parsing
            #         guess = get_format_for_mime_type(mime)
            #     # Try to guess from the file name
            #     if not guess and name:
            #         guess = guess_file_format(name)
    elif file_type is FileType.LOCAL:
        if is_str:
            target = Path(target)
        if mode.readable:
            target = check_readable_file(target)
            if validate or guess_format:
                guess = FORMATS.guess_format_from_file_header(target)
        else:
            target = check_writable_file(target)
            # If overwrite=False, check that the file doesn't already exist
            if not overwrite and os.path.exists(target):
                raise ValueError("File already exists: {}".format(target))
            if validate or guess_format:
                guess = FORMATS.guess_compression_format(target)

    if validate and guess != compression:
        # TODO: this is to handle the case where the same extension can be used for
        # multiple compression formats, and we're writing a file so the format cannot
        # be detected from the header. Formats currently does not support an extension
        # being used with multiple formats. Currently bgzip is the only format that has
        # this issue.
        if not mode.readable and FORMATS.has_compatible_extension(
                compression, guess):
            pass
        else:
            raise ValueError(
                "Acutal compression format {} is not compatible with expected "
                "format {}".format(guess, compression))
    elif guess:
        compression = guess
    elif compression is True:
        raise ValueError(f"Could not guess compression format from {target}")

    if compression:
        fmt = FORMATS.get_compression_format(str(compression))
        compression = fmt.name
        fileobj = fmt.open_file(fileobj or target,
                                mode,
                                use_system=use_system,
                                **kwargs)
        is_std = False
    elif not fileobj:
        fileobj = open(target, mode.value, **kwargs)
    elif mode.text and is_bin and (is_std or file_type is FileType.FILELIKE):
        fileobj = io.TextIOWrapper(fileobj)
        fileobj.mode = mode.value

    if context_wrapper:
        if is_std:
            fileobj = StdWrapper(fileobj, compression=compression)
        elif file_type == FileType.BUFFER:
            fileobj = BufferWrapper(fileobj,
                                    buffer,
                                    compression=compression,
                                    close_fileobj=close_fileobj)
        else:
            fileobj = FileWrapper(
                fileobj,
                name=name,
                mode=mode,
                compression=compression,
                close_fileobj=close_fileobj,
            )

    return fileobj
Beispiel #8
0
def xopen(
        path,  #: OpenArg,
        mode: ModeArg = None,
        compression: CompressionArg = None,
        use_system: bool = True,
        context_wrapper: bool = None,
        file_type: FileType = None,
        validate: bool = True,
        **kwargs) -> FileLike:
    """
    Replacement for the builtin `open` function that can also open URLs and
    subprocessess, and automatically handles compressed files.
    
    Args:
        path: A relative or absolute path, a URL, a system command, a
            file-like object, or :class:`bytes` or :class:`str` to
            indicate a writeable byte/string buffer.
        mode: Some combination of the access mode ('r', 'w', 'a', or 'x')
            and the open mode ('b' or 't'). If the later is not given, 't'
            is used by default.
        compression: If None or True, compression type (if any) will be
            determined automatically. If False, no attempt will be made to
            determine compression type. Otherwise this must specify the
            compression type (e.g. 'gz'). See `xphyle.compression` for
            details. Note that compression will *not* be guessed for
            '-' (stdin).
        use_system: Whether to attempt to use system-level compression
            programs.
        context_wrapper: If True, the file is wrapped in a `FileLikeWrapper`
            subclass before returning (`FileWrapper` for files/URLs,
            `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value
            (set using :method:`configure`) is used.
        file_type: a FileType; explicitly specify the file type. By default the
            file type is detected, but auto-detection might make mistakes, e.g.
            a local file contains a colon (':') in the name.
        validate: Ensure that the user-specified compression format matches the
            format guessed from the file extension or magic bytes.
        kwargs: Additional keyword arguments to pass to ``open``.
    
    `path` is interpreted as follows:
        * If starts with '|', it is assumed to be a system command
        * If a file-like object, it is used as-is
        * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used
        * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL
        * If file_type == FileType.BUFFER and path is a string or bytes and
          mode is readable, a new StringIO/BytesIO is created with 'path' passed
          to its constructor.
        * Otherwise it is assumed to be a local file
    
    If `use_system` is True and the file is compressed, the file is opened with
    a pipe to the system-level compression program (e.g. ``gzip`` for '.gz'
    files) if possible, otherwise the corresponding python library is used.
    
    Returns:
        A Process if `file_type` is PROCESS, or if `file_type` is None and
        `path` starts with '|'. Otherwise, an opened file-like object. If
        `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`.
    
    Raises:
        ValueError if:
            * ``compression`` is True and compression format cannot be
            determined
            * the specified compression format is invalid
            * ``validate`` is True and the specified compression format is not
                the acutal format of the file
            * the path or mode are invalid
    """
    if compression and isinstance(compression, str):
        cannonical_fmt_name = FORMATS.get_compression_format_name(compression)
        if cannonical_fmt_name is None:
            raise ValueError(
                "Invalid compression format: {}".format(compression))
        else:
            compression = cannonical_fmt_name

    # Whether the file object is stdin/stdout/stderr
    is_std = path in (STDIN, STDOUT, STDERR)
    # Whether path is a string or fileobj
    is_str = isinstance(path, str)
    # Whether path is a class indicating a buffer type
    is_buffer = path in (str, bytes)

    if not file_type:
        if is_std:
            file_type = FileType.STDIO
        elif is_buffer:
            file_type = FileType.BUFFER
        elif not is_str:
            file_type = FileType.FILELIKE
        elif path.startswith('|'):
            file_type = FileType.PROCESS
    elif file_type == FileType.BUFFER and (is_str or isinstance(path, bytes)):
        if not mode:
            mode = FileMode(access='r', coding='t' if is_str else 'b')
        is_buffer = True
    elif (is_str == (file_type is FileType.FILELIKE) or is_std !=
          (file_type is FileType.STDIO) or is_buffer !=
          (file_type is FileType.BUFFER)):
        raise ValueError("file_type = {} does not match path {}".format(
            file_type, path))

    if file_type in (FileType.URL, None):
        url_parts = parse_url(path)
        if not file_type:
            file_type = FileType.URL if url_parts else FileType.LOCAL
        elif not url_parts:
            raise ValueError("{} is not a valid URL".format(path))

    if not mode:
        # set to default
        if not is_buffer:
            mode = FileMode()
        elif path == str:
            mode = FileMode('wt')
        else:
            mode = FileMode('wb')
    elif isinstance(mode, str):
        if ('U' in mode and 'newline' in kwargs
                and kwargs['newline'] is not None):
            raise ValueError(
                "newline={} not compatible with universal newlines ('U') "
                "mode".format(kwargs['newline']))
        mode = FileMode(mode)

    if context_wrapper is None:
        context_wrapper = DEFAULTS['xopen_context_wrapper']

    # Return early if opening a process
    if file_type is FileType.PROCESS:
        if path.startswith('|'):
            path = path[1:]
        popen_args = dict(kwargs)
        for std in ('stdin', 'stdout', 'stderr'):
            popen_args[std] = PIPE
        if mode.writable:
            if compression is True:
                raise ValueError(
                    "Can determine compression automatically when writing to "
                    "process stdin")
            elif compression is None:
                compression = False
            target = 'stdin'
        else:
            target = 'stdout'
        popen_args[target] = dict(mode=mode,
                                  compression=compression,
                                  validate=validate,
                                  context_wrapper=context_wrapper)
        return popen(path, **popen_args)

    if file_type is FileType.BUFFER:
        if path == str:
            path = io.StringIO()
        elif path == bytes:
            path = io.BytesIO()
        elif is_str or isinstance(path, bytes):
            if not mode.readable:
                raise ValueError(
                    "'mode' must be readable when 'file_type' == BUFFER "
                    "and 'path' is string or bytes.")
            if is_str:
                if mode.coding != ModeCoding.TEXT:
                    raise ValueError("Must use text mode with a string buffer")
                path = io.StringIO(path)
            else:
                if mode.coding != ModeCoding.BINARY:
                    raise ValueError(
                        "Must use binary mode with a bytes buffer")
                path = io.BytesIO(path)
        if context_wrapper:
            buffer = path
        if not mode.readable:
            if compression is True:
                raise ValueError(
                    "Cannot guess compression for a write-only buffer")
            elif compression is None:
                compression = False
            validate = False

    # The file handle we will open
    # TODO: figure out the right type
    fileobj = None  # type: Any
    # The name to use for the file
    name = None
    # Guessed compression type, if compression in (None, True)
    guess = None
    # Whether to try and guess file format
    guess_format = compression in (None, True)
    # Whether to validate that the actually compression format matches expected
    validate = validate and bool(compression) and not guess_format

    if file_type is FileType.STDIO:
        use_system = False
        if path == STDERR:
            if not mode.writable:
                raise ValueError("Mode must be writable for stderr")
            stdobj = sys.stderr
        else:
            stdobj = sys.stdin if mode.readable else sys.stdout
        # get the underlying binary stream
        fileobj = stdobj.buffer
        if mode.readable and (validate or guess_format):
            if not hasattr(fileobj, 'peek'):
                fileobj = io.BufferedReader(fileobj)
            guess = FORMATS.guess_format_from_buffer(fileobj)
        else:
            validate = False
    elif file_type in (FileType.FILELIKE, FileType.BUFFER):
        fileobj = path
        use_system = False

        # determine mode of fileobj
        if hasattr(fileobj, 'mode'):
            fileobj_mode = FileMode(path.mode)
        elif hasattr(fileobj, 'readable'):
            access = ModeAccess.READWRITE
            # if fileobj.readable and fileobj.writable:
            #     access = ModeAccess.READWRITE
            # elif fileobj.writable:
            #     access = ModeAccess.WRITE
            # else:
            #     access = ModeAccess.READ
            fileobj_mode = FileMode(
                access=access,
                coding='t' if hasattr(fileobj, 'encoding') else 'b')
        else:  # pragma: no-cover
            # TODO I don't think we can actually get here, but leaving for now.
            raise ValueError("Cannot determine file mode")

        # make sure modes are compatible
        if not ((mode.readable and fileobj_mode.readable) or
                (mode.writable and fileobj_mode.writable)):
            raise ValueError(
                "mode {} and file mode {} are not compatible".format(
                    mode, fileobj_mode))

        # compression/decompression only possible for binary files
        if fileobj_mode.text:
            if compression:
                raise ValueError(
                    "Cannot compress to/decompress from a text-mode "
                    "file/buffer")
            else:
                guess_format = False
        elif validate or guess_format:
            if mode.readable:
                if not hasattr(fileobj, 'peek'):
                    fileobj = io.BufferedReader(fileobj)
                guess = FORMATS.guess_format_from_buffer(fileobj)
            elif hasattr(fileobj, 'name') and isinstance(fileobj.name, str):
                guess = FORMATS.guess_compression_format(fileobj.name)
            else:
                raise ValueError(
                    "Could not guess compression format from {}".format(path))
    elif file_type is FileType.URL:
        if not mode.readable:
            raise ValueError("URLs can only be opened in read mode")

        fileobj = open_url(path)
        if not fileobj:
            raise ValueError("Could not open URL {}".format(path))

        use_system = False
        name = get_url_file_name(fileobj, url_parts)

        # Get compression format if not specified
        if validate or guess_format:
            guess = FORMATS.guess_format_from_buffer(fileobj)
            # The following code is never used, unless there is some
            # scenario in which the file type cannot be guessed from
            # the header bytes. I'll leave this here for now but keep
            # it commented out until someone provides an example of
            # why it's necessary.
            # if guess is None and guess_format:
            #     # Check if the MIME type indicates that the file is
            #     # compressed
            #     mime = get_url_mime_type(fileobj)
            #     if mime:
            # TODO: look at this https://github.com/dbtsai/python-mimeparse
            # or similar for mime parsing
            #         guess = get_format_for_mime_type(mime)
            #     # Try to guess from the file name
            #     if not guess and name:
            #         guess = guess_file_format(name)
    elif file_type is FileType.LOCAL:
        if mode.readable:
            path = check_readable_file(path)
            if validate or guess_format:
                guess = FORMATS.guess_format_from_file_header(path)
        else:
            path = check_writable_file(path)
            if validate or guess_format:
                guess = FORMATS.guess_compression_format(path)

    if validate and guess != compression:
        raise ValueError(
            "Acutal compression format {} does not match expected "
            "format {}".format(guess, compression))
    elif guess:
        compression = guess
    elif compression is True:
        raise ValueError(
            "Could not guess compression format from {}".format(path))

    if compression:
        fmt = FORMATS.get_compression_format(str(compression))
        compression = fmt.name
        fileobj = fmt.open_file(fileobj or path,
                                mode,
                                use_system=use_system,
                                **kwargs)
        is_std = False
    elif not fileobj:
        fileobj = open(path, mode.value, **kwargs)
    elif mode.text and (is_std or (file_type is FileType.FILELIKE
                                   and not fileobj_mode.text)):
        fileobj = io.TextIOWrapper(fileobj)
        fileobj.mode = mode.value

    if context_wrapper:
        if is_std:
            fileobj = StdWrapper(fileobj, compression=compression)
        elif file_type == FileType.BUFFER:
            fileobj = BufferWrapper(fileobj, buffer, compression=compression)
        else:
            fileobj = FileWrapper(fileobj,
                                  name=name,
                                  mode=mode,
                                  compression=compression)

    return fileobj