def get_filepath_or_buffer( filepath_or_buffer: FilePathOrBuffer, encoding: Optional[str] = None, compression: Optional[str] = None, mode: Optional[str] = None, ): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional encoding : the encoding to use to decode bytes, default is 'utf-8' mode : str, optional Returns ------- Tuple[FilePathOrBuffer, str, str, bool] Tuple containing the filepath or buffer, the encoding, the compression and should_close. """ filepath_or_buffer = stringify_path(filepath_or_buffer) if isinstance(filepath_or_buffer, str) and is_url(filepath_or_buffer): req = urlopen(filepath_or_buffer) content_encoding = req.headers.get("Content-Encoding", None) if content_encoding == "gzip": # Override compression based on Content-Encoding header compression = "gzip" reader = BytesIO(req.read()) req.close() return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if is_gcs_url(filepath_or_buffer): from pandas.io import gcs return gcs.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}" raise ValueError(msg) return filepath_or_buffer, None, compression, False
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None, mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer compression : {{'gzip', 'bz2', 'zip', 'xz', None}}, optional encoding : the encoding to use to decode bytes, default is 'utf-8' mode : str, optional Returns ------- tuple of ({a filepath_ or buffer or S3File instance}, encoding, str, compression, str, should_close, bool) """ filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): req = urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) req.close() return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if is_gcs_url(filepath_or_buffer): from pandas.io import gcs return gcs.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): msg = "Invalid file path or buffer object type: {_type}" raise ValueError(msg.format(_type=type(filepath_or_buffer))) return filepath_or_buffer, None, compression, False
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None, mode=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' mode : str, optional Returns ------- tuple of ({a filepath_ or buffer or S3File instance}, encoding, str, compression, str, should_close, bool) """ filepath_or_buffer = _stringify_path(filepath_or_buffer) if _is_url(filepath_or_buffer): req = _urlopen(filepath_or_buffer) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) req.close() return reader, encoding, compression, True if is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if is_gcs_url(filepath_or_buffer): from pandas.io import gcs return gcs.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression, mode=mode) if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): msg = "Invalid file path or buffer object type: {_type}" raise ValueError(msg.format(_type=type(filepath_or_buffer))) return filepath_or_buffer, None, compression, False
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): url = str(filepath_or_buffer) req = _urlopen(url) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) return reader, encoding, compression if _is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression) # Convert pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) if isinstance(filepath_or_buffer, (compat.string_types, compat.binary_type, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression if not is_file_like(filepath_or_buffer): msg = "Invalid file path or buffer object type: {_type}" raise ValueError(msg.format(_type=type(filepath_or_buffer))) return filepath_or_buffer, None, compression
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compression == 'infer': content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' else: compression = None # cat on the compression to the tuple returned by the function to_return = ( list(maybe_read_encoded_stream(req, encoding, compression)) + [compression]) return tuple(to_return) if _is_s3_url(filepath_or_buffer): from pandas.io.s3 import get_filepath_or_buffer return get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression) # It is a pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer passthru otherwise. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): req = _urlopen(str(filepath_or_buffer)) if compression == 'infer': content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' else: compression = None # cat on the compression to the tuple returned by the function to_return = (list(maybe_read_encoded_stream(req, encoding, compression)) + [compression]) return tuple(to_return) if _is_s3_url(filepath_or_buffer): from pandas.io.s3 import get_filepath_or_buffer return get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression) # It is a pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression
def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=None): """ If the filepath_or_buffer is a url, translate and return the buffer. Otherwise passthrough. Parameters ---------- filepath_or_buffer : a url, filepath (str, py.path.local or pathlib.Path), or buffer encoding : the encoding to use to decode py3 bytes, default is 'utf-8' Returns ------- a filepath_or_buffer, the encoding, the compression """ if _is_url(filepath_or_buffer): url = str(filepath_or_buffer) req = _urlopen(url) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': # Override compression based on Content-Encoding header compression = 'gzip' reader = BytesIO(req.read()) return reader, encoding, compression if _is_s3_url(filepath_or_buffer): from pandas.io import s3 return s3.get_filepath_or_buffer(filepath_or_buffer, encoding=encoding, compression=compression) # It is a pathlib.Path/py.path.local or string filepath_or_buffer = _stringify_path(filepath_or_buffer) return _expand_user(filepath_or_buffer), None, compression