def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg, **kwargs) -> FileLike: if isinstance(mode, str): mode = FileMode(mode) if mode.text: return io.TextIOWrapper( self.lib.BZ2File(path_or_file, mode.access.value, **kwargs)) else: return self.lib.BZ2File(path_or_file, mode.value, **kwargs)
def open_file(self, path: str, mode: ModeArg, use_system: bool = True, **kwargs) -> FileLike: """Opens a compressed file for reading or writing. If ``use_system`` is True and the system provides an accessible executable, then system-level compression is used. Otherwise defaults to using the python implementation. Args: path: The path of the file to open. mode: The file open mode. use_system: Whether to attempt to use system-level compression. kwargs: Additional arguments to pass to the python-level open method, if system-level compression isn't used. Returns: A file-like object. """ if isinstance(mode, str): mode = FileMode(mode) if use_system: # pylint: disable=redefined-variable-type gzfile = None # type: FileLikeInterface if mode.readable and self.can_use_system_compression: gzfile = SystemReader(self.compress_path, path, self.get_command('d', src=path), self.compress_name) elif not mode.readable and self.can_use_system_decompression: bin_mode = FileMode(access=mode.access, coding=ModeCoding.BINARY) gzfile = SystemWriter(self.decompress_path, path, bin_mode, self.get_command('c'), self.decompress_name) if gzfile: if mode.text: return io.TextIOWrapper(gzfile) else: return gzfile return self.open_file_python(path, mode, **kwargs)
def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg, **kwargs) -> FileLike: # pylint: disable=redefined-variable-type if isinstance(mode, str): mode = FileMode(mode) compressed_file = self.lib.open(path_or_file, mode.value, **kwargs) if mode.binary: if mode.readable: compressed_file = io.BufferedReader(compressed_file) else: compressed_file = io.BufferedWriter(compressed_file) return compressed_file
def open_file_python(self, path_or_file: PathOrFile, mode: ModeArg, **kwargs) -> FileLike: """Open a file using the python library. Args: f: The file to open -- a path or open file object. mode: The file open mode. kwargs: Additional arguments to pass to the open method. Returns: A file-like object. """ if isinstance(mode, str): mode = FileMode(mode) return self.lib.open(path_or_file, mode.value, **kwargs)
def peek(self, size: int = 1) -> AnyChar: """Return bytes/characters from the stream without advancing the position. At most one single read on the raw stream is done to satisfy the call. Args: size: The max number of bytes/characters to return. Returns: At most `size` bytes/characters. Unlike io.BufferedReader.peek(), will never return more than `size` bytes/characters. Notes: If the file uses multi-byte encoding and N characters are desired, it is up to the caller to request `size=2N`. """ if not FileMode(self._fileobj.mode).readable: raise IOError("Can only call peek() on a readable file") if hasattr(self._fileobj, "peek"): # The underlying file has a peek() method peek = getattr(self._fileobj, "peek")(size) # I don't think the following is a valid state # if 't' in self._fileobj.mode: # if isinstance(peek, 'bytes'): # if hasattr(self._fileobj, 'encoding'): # peek = peek_bytes.decode(self._fileobj.encoding) # else: # peek = peek_bytes.decode() if len(peek) > size: peek = peek[:size] elif hasattr(self._fileobj, "seek"): # The underlying file has a seek() method curpos = self._fileobj.tell() try: peek = self._fileobj.read(size) finally: self._fileobj.seek(curpos) else: # pragma: no-cover # TODO I don't think it's possible to get here, but leaving for now raise IOError("Unpeekable file: {}".format(self.name)) return peek
def __init__(self, files: FilesArg = None, access: ModeAccessArg = 'w', char_mode: CharMode = None, linesep: CharMode = None, encoding: str = 'utf-8', header: CharMode = None) -> None: super().__init__(mode=FileMode( access=access, coding='t' if char_mode == TextMode else 'b'), header=header) self.access = access self.char_mode = char_mode # type: CharMode self._empty = cast( CharMode, b'' if char_mode == BinMode else '') # type: CharMode self.encoding = encoding # type: str self.num_lines = 0 # type: int self.linesep = linesep # type: CharMode self._linesep_len = len(linesep) # type: int if files: self.add_all(files)
def __init__(self, executable_path: PathLike, path: PathLike, mode: ModeArg = 'w', command: List[str] = None, executable_name: str = None) -> None: super().__init__(path) self.executable_name = (executable_name or os.path.basename(str(executable_path))) self.command = command or [self.executable_name] if isinstance(mode, str): mode = FileMode(mode) self.outfile = open(str(path), mode.value) self.devnull = open(os.devnull, 'w') try: self.process = Popen(self.command, stdin=PIPE, stdout=self.outfile, stderr=self.devnull) except IOError: # pragma: no-cover self.outfile.close() self.devnull.close() raise
def __init__( self, files: FilesArg = None, access: ModeAccessArg = "w", char_mode: Optional[CharMode] = None, linesep: Optional[CharMode] = None, encoding: str = "utf-8", header: Optional[CharMode] = None, ) -> None: super().__init__( mode=FileMode(access=access, coding="t" if char_mode == TextMode else "b"), header=header, ) self.access = access self.char_mode: CharMode = char_mode self._empty: CharMode = cast(CharMode, b"" if char_mode == BinMode else "") self.encoding: str = encoding self.num_lines: int = 0 self.linesep: CharMode = linesep self._linesep_len: int = len(linesep) if files: self.add_all(files)
def linecount(path_or_file: PathOrFile, linesep: Optional[bytes] = None, buffer_size: int = 1024 * 1024, **kwargs) -> int: """Fastest pythonic way to count the lines in a file. Args: path_or_file: File object, or path to the file. linesep: Line delimiter, specified as a byte string (e.g. b'\\n'). buffer_size: How many bytes to read at a time (1 Mb by default). kwargs: Additional arguments to pass to the file open method. Returns: The number of lines in the file. Blank lines (including the last line in the file) are included. """ if buffer_size < 1: raise ValueError("'buffer_size' must be >= ") if linesep is None: linesep = os.linesep.encode() if "mode" not in kwargs: kwargs["mode"] = "rb" elif FileMode(kwargs["mode"]).value != "rb": raise ValueError("File must be opened with mode 'rb'") with open_(path_or_file, **kwargs) as fileobj: if fileobj is None: return -1 read_f = fileobj.read # loop optimization buf = read_f(buffer_size) if len(buf) == 0: # undefined file case return 0 lines = 1 while buf: lines += buf.count(linesep) buf = read_f(buffer_size) return lines
def xopen( target: OpenArg, mode: ModeArg = None, compression: CompressionArg = None, use_system: bool = True, allow_subprocesses: bool = True, context_wrapper: bool = None, file_type: FileType = None, validate: bool = True, overwrite: bool = True, close_fileobj: bool = True, **kwargs, ) -> FileLike: """ Replacement for the builtin `open` function that can also open URLs and subprocessess, and automatically handles compressed files. Args: target: A relative or absolute path, a URL, a system command, a file-like object, or :class:`bytes` or :class:`str` to indicate a writeable byte/string buffer. mode: Some combination of the access mode ('r', 'w', 'a', or 'x') and the open mode ('b' or 't'). If the later is not given, 't' is used by default. compression: If None or True, compression type (if any) will be determined automatically. If False, no attempt will be made to determine compression type. Otherwise this must specify the compression type (e.g. 'gz'). See `xphyle.compression` for details. Note that compression will *not* be guessed for '-' (stdin). use_system: Whether to attempt to use system-level compression programs. allow_subprocesses: Whether to allow `path` to be a subprocess (e.g. '|cat'). There are security risks associated with allowing users to run arbitrary system commands. context_wrapper: If True, the file is wrapped in a `FileLikeWrapper` subclass before returning (`FileWrapper` for files/URLs, `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value (set using :method:`configure`) is used. file_type: a FileType; explicitly specify the file type. By default the file type is detected, but auto-detection might make mistakes, e.g. a local file contains a colon (':') in the name. validate: Ensure that the user-specified compression format matches the format guessed from the file extension or magic bytes. overwrite: For files opened in write mode, whether to overwrite existing files (True). close_fileobj: When `path` is a file-like object / `file_type` is FileType.FILELIKE, and `context_wrapper` is True, whether to close the underlying file when closing the wrapper. kwargs: Additional keyword arguments to pass to ``open``. `path` is interpreted as follows: * If starts with '|', it is assumed to be a system command * If a file-like object, it is used as-is * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL * If file_type == FileType.BUFFER and path is a string or bytes and mode is readable, a new StringIO/BytesIO is created with 'path' passed to its constructor. * Otherwise it is assumed to be a local file If `use_system` is True and the file is compressed, the file is opened with a pipe to the system-level compression program (e.g. ``gzip`` for '.gz' files) if possible, otherwise the corresponding python library is used. Returns: A Process if `file_type` is PROCESS, or if `file_type` is None and `path` starts with '|'. Otherwise, an opened file-like object. If `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`. Raises: ValueError if: * ``compression`` is True and compression format cannot be determined * the specified compression format is invalid * ``validate`` is True and the specified compression format is not the acutal format of the file * the path or mode are invalid """ if compression and isinstance(compression, str): cannonical_fmt_name = FORMATS.get_compression_format_name(compression) if cannonical_fmt_name is None: raise ValueError( "Invalid compression format: {}".format(compression)) else: compression = cannonical_fmt_name # Convert placeholder strings ("-", "_") to paths target = convert_std_placeholder(target, mode) # Whether the file object is stdin/stdout/stderr is_std = target in (STDIN, STDOUT, STDERR) # Whether 'target' is currently a file-like object in binary mode is_bin = False # Whether target is a string is_str = isinstance(target, str) # Whether target is a Path is_path = not is_std and isinstance(target, PurePath) # Whether target is a class indicating a buffer type is_buffer = target in (str, bytes) if not file_type: if is_path: file_type = FileType.LOCAL elif is_std: file_type = FileType.STDIO elif is_buffer: file_type = FileType.BUFFER elif not is_str: file_type = FileType.FILELIKE elif target.startswith("|"): file_type = FileType.PROCESS elif file_type == FileType.BUFFER and (is_str or is_path or isinstance(target, bytes)): if not mode: mode = FileMode(access="r", coding="t" if is_str else "b") is_buffer = True elif ((is_str or is_path or is_buffer) == (file_type is FileType.FILELIKE) or is_std != (file_type is FileType.STDIO) or is_buffer != (file_type is FileType.BUFFER)): raise ValueError( f"file_type = {file_type} does not match target {target}") url_parts = None if file_type in (FileType.URL, None): url_parts = parse_url(target) if not file_type: file_type = FileType.URL if url_parts else FileType.LOCAL elif not url_parts: raise ValueError(f"{target} is not a valid URL") if not mode: # set to default if not is_buffer: mode = FileMode() elif target == str: mode = FileMode("wt") else: mode = FileMode("wb") elif isinstance(mode, str): if "U" in mode and "newline" in kwargs and kwargs[ "newline"] is not None: raise ValueError( "newline={} not compatible with universal newlines ('U') " "mode".format(kwargs["newline"])) mode = FileMode(mode) if context_wrapper is None: context_wrapper = DEFAULTS["xopen_context_wrapper"] # Return early if opening a process if file_type is FileType.PROCESS: if not allow_subprocesses: raise ValueError("Subprocesses are disallowed") if target.startswith("|"): target = target[1:] popen_args = dict(kwargs) for std in ("stdin", "stdout", "stderr"): popen_args[std] = PIPE if mode.writable: if compression is True: raise ValueError( "Can determine compression automatically when writing to " "process stdin") elif compression is None: compression = False outstream = "stdin" else: outstream = "stdout" popen_args[outstream] = dict( mode=mode, compression=compression, validate=validate, context_wrapper=context_wrapper, ) return popen(target, **popen_args) buffer = None if file_type is FileType.BUFFER: if target == str: target = io.StringIO() elif target == bytes: target = io.BytesIO() is_bin = True elif is_str or isinstance(target, bytes): if not mode.readable: raise ValueError( "'mode' must be readable when 'file_type' == BUFFER " "and 'target' is string or bytes.") if is_str: if mode.coding != ModeCoding.TEXT: raise ValueError("Must use text mode with a string buffer") target = io.StringIO(target) else: if mode.coding != ModeCoding.BINARY: raise ValueError( "Must use binary mode with a bytes buffer") target = io.BytesIO(target) is_bin = True if context_wrapper: buffer = target if not mode.readable: if compression is True: raise ValueError( "Cannot guess compression for a write-only buffer") elif compression is None: compression = False validate = False # The file handle we will open # TODO: figure out the right type fileobj: Any = None # The name to use for the file name = None # Guessed compression type, if compression in (None, True) guess = None # Whether to try and guess file format guess_format = compression in (None, True) # Whether to validate that the actual compression format matches expected validate = validate and bool(compression) and not guess_format if file_type is FileType.STDIO: use_system = False if target == STDERR: if not mode.writable: raise ValueError("Mode must be writable for stderr") stdobj = sys.stderr else: stdobj = sys.stdin if mode.readable else sys.stdout # whether we need the underlying byte stream regardless of the mode check_readable = mode.readable and (validate or guess_format) if mode.binary or compression or check_readable: # get the underlying binary stream fileobj = stdobj.buffer is_bin = True else: fileobj = stdobj if check_readable: if not hasattr(fileobj, "peek"): fileobj = io.BufferedReader(fileobj) guess = FORMATS.guess_format_from_buffer(fileobj) else: validate = False elif file_type in (FileType.FILELIKE, FileType.BUFFER): fileobj = target use_system = False # determine mode of fileobj if hasattr(fileobj, "mode"): fileobj_mode = FileMode(target.mode) elif hasattr(fileobj, "readable"): access = ModeAccess.READWRITE # if fileobj.readable and fileobj.writable: # access = ModeAccess.READWRITE # elif fileobj.writable: # access = ModeAccess.WRITE # else: # access = ModeAccess.READ fileobj_mode = FileMode( access=access, coding="t" if hasattr(fileobj, "encoding") else "b") else: # pragma: no-cover # TODO I don't think we can actually get here, but leaving for now. raise ValueError("Cannot determine file mode") # make sure modes are compatible if not ((mode.readable and fileobj_mode.readable) or (mode.writable and fileobj_mode.writable)): raise ValueError( "mode {} and file mode {} are not compatible".format( mode, fileobj_mode)) # compression/decompression only possible for binary files is_bin = fileobj_mode.binary if not is_bin: if compression: raise ValueError( "Cannot compress to/decompress from a text-mode " "file/buffer") else: # noinspection PyUnusedLocal guess_format = False elif validate or guess_format: if mode.readable: if not hasattr(fileobj, "peek"): fileobj = io.BufferedReader(fileobj) guess = FORMATS.guess_format_from_buffer(fileobj) elif hasattr(fileobj, "name") and isinstance(fileobj.name, str): guess = FORMATS.guess_compression_format(fileobj.name) else: raise ValueError( "Could not guess compression format from {}".format( target)) elif file_type is FileType.URL: if not mode.readable: raise ValueError("URLs can only be opened in read mode") fileobj = open_url(target) if not fileobj: raise ValueError("Could not open URL {}".format(target)) use_system = False name = get_url_file_name(fileobj, url_parts) # Get compression format if not specified if validate or guess_format: guess = FORMATS.guess_format_from_buffer(fileobj) # The following code is never used, unless there is some # scenario in which the file type cannot be guessed from # the header bytes. I'll leave this here for now but keep # it commented out until someone provides an example of # why it's necessary. # if guess is None and guess_format: # # Check if the MIME type indicates that the file is # # compressed # mime = get_url_mime_type(fileobj) # if mime: # TODO: look at this https://github.com/dbtsai/python-mimeparse # or similar for mime parsing # guess = get_format_for_mime_type(mime) # # Try to guess from the file name # if not guess and name: # guess = guess_file_format(name) elif file_type is FileType.LOCAL: if is_str: target = Path(target) if mode.readable: target = check_readable_file(target) if validate or guess_format: guess = FORMATS.guess_format_from_file_header(target) else: target = check_writable_file(target) # If overwrite=False, check that the file doesn't already exist if not overwrite and os.path.exists(target): raise ValueError("File already exists: {}".format(target)) if validate or guess_format: guess = FORMATS.guess_compression_format(target) if validate and guess != compression: # TODO: this is to handle the case where the same extension can be used for # multiple compression formats, and we're writing a file so the format cannot # be detected from the header. Formats currently does not support an extension # being used with multiple formats. Currently bgzip is the only format that has # this issue. if not mode.readable and FORMATS.has_compatible_extension( compression, guess): pass else: raise ValueError( "Acutal compression format {} is not compatible with expected " "format {}".format(guess, compression)) elif guess: compression = guess elif compression is True: raise ValueError(f"Could not guess compression format from {target}") if compression: fmt = FORMATS.get_compression_format(str(compression)) compression = fmt.name fileobj = fmt.open_file(fileobj or target, mode, use_system=use_system, **kwargs) is_std = False elif not fileobj: fileobj = open(target, mode.value, **kwargs) elif mode.text and is_bin and (is_std or file_type is FileType.FILELIKE): fileobj = io.TextIOWrapper(fileobj) fileobj.mode = mode.value if context_wrapper: if is_std: fileobj = StdWrapper(fileobj, compression=compression) elif file_type == FileType.BUFFER: fileobj = BufferWrapper(fileobj, buffer, compression=compression, close_fileobj=close_fileobj) else: fileobj = FileWrapper( fileobj, name=name, mode=mode, compression=compression, close_fileobj=close_fileobj, ) return fileobj
def xopen( path, #: OpenArg, mode: ModeArg = None, compression: CompressionArg = None, use_system: bool = True, context_wrapper: bool = None, file_type: FileType = None, validate: bool = True, **kwargs) -> FileLike: """ Replacement for the builtin `open` function that can also open URLs and subprocessess, and automatically handles compressed files. Args: path: A relative or absolute path, a URL, a system command, a file-like object, or :class:`bytes` or :class:`str` to indicate a writeable byte/string buffer. mode: Some combination of the access mode ('r', 'w', 'a', or 'x') and the open mode ('b' or 't'). If the later is not given, 't' is used by default. compression: If None or True, compression type (if any) will be determined automatically. If False, no attempt will be made to determine compression type. Otherwise this must specify the compression type (e.g. 'gz'). See `xphyle.compression` for details. Note that compression will *not* be guessed for '-' (stdin). use_system: Whether to attempt to use system-level compression programs. context_wrapper: If True, the file is wrapped in a `FileLikeWrapper` subclass before returning (`FileWrapper` for files/URLs, `StdWrapper` for STDIN/STDOUT/STDERR). If None, the default value (set using :method:`configure`) is used. file_type: a FileType; explicitly specify the file type. By default the file type is detected, but auto-detection might make mistakes, e.g. a local file contains a colon (':') in the name. validate: Ensure that the user-specified compression format matches the format guessed from the file extension or magic bytes. kwargs: Additional keyword arguments to pass to ``open``. `path` is interpreted as follows: * If starts with '|', it is assumed to be a system command * If a file-like object, it is used as-is * If one of STDIN, STDOUT, STDERR, the appropriate `sys` stream is used * If parseable by `xphyle.urls.parse_url()`, it is assumed to be a URL * If file_type == FileType.BUFFER and path is a string or bytes and mode is readable, a new StringIO/BytesIO is created with 'path' passed to its constructor. * Otherwise it is assumed to be a local file If `use_system` is True and the file is compressed, the file is opened with a pipe to the system-level compression program (e.g. ``gzip`` for '.gz' files) if possible, otherwise the corresponding python library is used. Returns: A Process if `file_type` is PROCESS, or if `file_type` is None and `path` starts with '|'. Otherwise, an opened file-like object. If `context_wrapper` is True, this will be a subclass of `FileLikeWrapper`. Raises: ValueError if: * ``compression`` is True and compression format cannot be determined * the specified compression format is invalid * ``validate`` is True and the specified compression format is not the acutal format of the file * the path or mode are invalid """ if compression and isinstance(compression, str): cannonical_fmt_name = FORMATS.get_compression_format_name(compression) if cannonical_fmt_name is None: raise ValueError( "Invalid compression format: {}".format(compression)) else: compression = cannonical_fmt_name # Whether the file object is stdin/stdout/stderr is_std = path in (STDIN, STDOUT, STDERR) # Whether path is a string or fileobj is_str = isinstance(path, str) # Whether path is a class indicating a buffer type is_buffer = path in (str, bytes) if not file_type: if is_std: file_type = FileType.STDIO elif is_buffer: file_type = FileType.BUFFER elif not is_str: file_type = FileType.FILELIKE elif path.startswith('|'): file_type = FileType.PROCESS elif file_type == FileType.BUFFER and (is_str or isinstance(path, bytes)): if not mode: mode = FileMode(access='r', coding='t' if is_str else 'b') is_buffer = True elif (is_str == (file_type is FileType.FILELIKE) or is_std != (file_type is FileType.STDIO) or is_buffer != (file_type is FileType.BUFFER)): raise ValueError("file_type = {} does not match path {}".format( file_type, path)) if file_type in (FileType.URL, None): url_parts = parse_url(path) if not file_type: file_type = FileType.URL if url_parts else FileType.LOCAL elif not url_parts: raise ValueError("{} is not a valid URL".format(path)) if not mode: # set to default if not is_buffer: mode = FileMode() elif path == str: mode = FileMode('wt') else: mode = FileMode('wb') elif isinstance(mode, str): if ('U' in mode and 'newline' in kwargs and kwargs['newline'] is not None): raise ValueError( "newline={} not compatible with universal newlines ('U') " "mode".format(kwargs['newline'])) mode = FileMode(mode) if context_wrapper is None: context_wrapper = DEFAULTS['xopen_context_wrapper'] # Return early if opening a process if file_type is FileType.PROCESS: if path.startswith('|'): path = path[1:] popen_args = dict(kwargs) for std in ('stdin', 'stdout', 'stderr'): popen_args[std] = PIPE if mode.writable: if compression is True: raise ValueError( "Can determine compression automatically when writing to " "process stdin") elif compression is None: compression = False target = 'stdin' else: target = 'stdout' popen_args[target] = dict(mode=mode, compression=compression, validate=validate, context_wrapper=context_wrapper) return popen(path, **popen_args) if file_type is FileType.BUFFER: if path == str: path = io.StringIO() elif path == bytes: path = io.BytesIO() elif is_str or isinstance(path, bytes): if not mode.readable: raise ValueError( "'mode' must be readable when 'file_type' == BUFFER " "and 'path' is string or bytes.") if is_str: if mode.coding != ModeCoding.TEXT: raise ValueError("Must use text mode with a string buffer") path = io.StringIO(path) else: if mode.coding != ModeCoding.BINARY: raise ValueError( "Must use binary mode with a bytes buffer") path = io.BytesIO(path) if context_wrapper: buffer = path if not mode.readable: if compression is True: raise ValueError( "Cannot guess compression for a write-only buffer") elif compression is None: compression = False validate = False # The file handle we will open # TODO: figure out the right type fileobj = None # type: Any # The name to use for the file name = None # Guessed compression type, if compression in (None, True) guess = None # Whether to try and guess file format guess_format = compression in (None, True) # Whether to validate that the actually compression format matches expected validate = validate and bool(compression) and not guess_format if file_type is FileType.STDIO: use_system = False if path == STDERR: if not mode.writable: raise ValueError("Mode must be writable for stderr") stdobj = sys.stderr else: stdobj = sys.stdin if mode.readable else sys.stdout # get the underlying binary stream fileobj = stdobj.buffer if mode.readable and (validate or guess_format): if not hasattr(fileobj, 'peek'): fileobj = io.BufferedReader(fileobj) guess = FORMATS.guess_format_from_buffer(fileobj) else: validate = False elif file_type in (FileType.FILELIKE, FileType.BUFFER): fileobj = path use_system = False # determine mode of fileobj if hasattr(fileobj, 'mode'): fileobj_mode = FileMode(path.mode) elif hasattr(fileobj, 'readable'): access = ModeAccess.READWRITE # if fileobj.readable and fileobj.writable: # access = ModeAccess.READWRITE # elif fileobj.writable: # access = ModeAccess.WRITE # else: # access = ModeAccess.READ fileobj_mode = FileMode( access=access, coding='t' if hasattr(fileobj, 'encoding') else 'b') else: # pragma: no-cover # TODO I don't think we can actually get here, but leaving for now. raise ValueError("Cannot determine file mode") # make sure modes are compatible if not ((mode.readable and fileobj_mode.readable) or (mode.writable and fileobj_mode.writable)): raise ValueError( "mode {} and file mode {} are not compatible".format( mode, fileobj_mode)) # compression/decompression only possible for binary files if fileobj_mode.text: if compression: raise ValueError( "Cannot compress to/decompress from a text-mode " "file/buffer") else: guess_format = False elif validate or guess_format: if mode.readable: if not hasattr(fileobj, 'peek'): fileobj = io.BufferedReader(fileobj) guess = FORMATS.guess_format_from_buffer(fileobj) elif hasattr(fileobj, 'name') and isinstance(fileobj.name, str): guess = FORMATS.guess_compression_format(fileobj.name) else: raise ValueError( "Could not guess compression format from {}".format(path)) elif file_type is FileType.URL: if not mode.readable: raise ValueError("URLs can only be opened in read mode") fileobj = open_url(path) if not fileobj: raise ValueError("Could not open URL {}".format(path)) use_system = False name = get_url_file_name(fileobj, url_parts) # Get compression format if not specified if validate or guess_format: guess = FORMATS.guess_format_from_buffer(fileobj) # The following code is never used, unless there is some # scenario in which the file type cannot be guessed from # the header bytes. I'll leave this here for now but keep # it commented out until someone provides an example of # why it's necessary. # if guess is None and guess_format: # # Check if the MIME type indicates that the file is # # compressed # mime = get_url_mime_type(fileobj) # if mime: # TODO: look at this https://github.com/dbtsai/python-mimeparse # or similar for mime parsing # guess = get_format_for_mime_type(mime) # # Try to guess from the file name # if not guess and name: # guess = guess_file_format(name) elif file_type is FileType.LOCAL: if mode.readable: path = check_readable_file(path) if validate or guess_format: guess = FORMATS.guess_format_from_file_header(path) else: path = check_writable_file(path) if validate or guess_format: guess = FORMATS.guess_compression_format(path) if validate and guess != compression: raise ValueError( "Acutal compression format {} does not match expected " "format {}".format(guess, compression)) elif guess: compression = guess elif compression is True: raise ValueError( "Could not guess compression format from {}".format(path)) if compression: fmt = FORMATS.get_compression_format(str(compression)) compression = fmt.name fileobj = fmt.open_file(fileobj or path, mode, use_system=use_system, **kwargs) is_std = False elif not fileobj: fileobj = open(path, mode.value, **kwargs) elif mode.text and (is_std or (file_type is FileType.FILELIKE and not fileobj_mode.text)): fileobj = io.TextIOWrapper(fileobj) fileobj.mode = mode.value if context_wrapper: if is_std: fileobj = StdWrapper(fileobj, compression=compression) elif file_type == FileType.BUFFER: fileobj = BufferWrapper(fileobj, buffer, compression=compression) else: fileobj = FileWrapper(fileobj, name=name, mode=mode, compression=compression) return fileobj