def infer_compression( filepath_or_buffer: FilePathOrBuffer, compression: str | None ) -> str | None: """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input compression method is returned unchanged, unless it's invalid, in which case an error is raised. Parameters ---------- filepath_or_buffer : str or file handle File path or object. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} If 'infer' and `filepath_or_buffer` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression). Returns ------- string or None Raises ------ ValueError on invalid compression specified. """ if compression is None: return None # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings filepath_or_buffer = stringify_path(filepath_or_buffer, convert_file_like=True) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None # Infer compression from the filename/URL extension for compression, extension in _compression_to_extension.items(): if filepath_or_buffer.lower().endswith(extension): return compression return None # Compression has been specified. Check that it's valid if compression in _compression_to_extension: return compression # https://github.com/python/mypy/issues/5492 # Unsupported operand types for + ("List[Optional[str]]" and "List[str]") valid = ["infer", None] + sorted( _compression_to_extension ) # type: ignore[operator] msg = ( f"Unrecognized compression type: {compression}\n" f"Valid compression types are {valid}" ) raise ValueError(msg)
def infer_compression( filepath_or_buffer: FilePathOrBuffer, compression: Optional[str] ) -> Optional[str]: """ Get the compression method for filepath_or_buffer. If compression='infer', the inferred compression method is returned. Otherwise, the input compression method is returned unchanged, unless it's invalid, in which case an error is raised. Parameters ---------- filepath_or_buffer : str or file handle File path or object. compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} If 'infer' and `filepath_or_buffer` is path-like, then detect compression from the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise no compression). Returns ------- string or None Raises ------ ValueError on invalid compression specified. """ # No compression has been explicitly specified if compression is None: return None # Infer compression if compression == "infer": # Convert all path types (e.g. pathlib.Path) to strings filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None # Infer compression from the filename/URL extension for compression, extension in _compression_to_extension.items(): if filepath_or_buffer.lower().endswith(extension): return compression return None # Compression has been specified. Check that it's valid if compression in _compression_to_extension: return compression msg = f"Unrecognized compression type: {compression}" valid = ["infer", None] + sorted(_compression_to_extension) msg += f"\nValid compression types are {valid}" raise ValueError(msg)
def read_sas( filepath_or_buffer: FilePathOrBuffer, format: Optional[str] = None, index: Optional[Label] = None, encoding: Optional[str] = None, chunksize: Optional[int] = None, iterator: bool = False, ) -> Union["DataFrame", ReaderBase]: """ Read SAS files stored as either XPORT or SAS7BDAT format files. Parameters ---------- filepath_or_buffer : str, path object or file-like object Any valid string path is acceptable. The string could be a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is expected. A local file could be: ``file://localhost/path/to/table.sas``. If you want to pass in a path object, pandas accepts any ``os.PathLike``. By file-like object, we refer to objects with a ``read()`` method, such as a file handle (e.g. via builtin ``open`` function) or ``StringIO``. format : str {'xport', 'sas7bdat'} or None If None, file format is inferred from file extension. If 'xport' or 'sas7bdat', uses the corresponding format. index : identifier of index column, defaults to None Identifier of column that should be used as index of the DataFrame. encoding : str, default is None Encoding for text data. If None, text data are stored as raw bytes. chunksize : int Read file `chunksize` lines at a time, returns iterator. iterator : bool, defaults to False If True, returns an iterator for reading the file incrementally. Returns ------- DataFrame if iterator=False and chunksize=None, else SAS7BDATReader or XportReader """ if format is None: buffer_error_msg = ( "If this is a buffer object rather " "than a string name, you must specify a format string") filepath_or_buffer = stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, str): raise ValueError(buffer_error_msg) fname = filepath_or_buffer.lower() if fname.endswith(".xpt"): format = "xport" elif fname.endswith(".sas7bdat"): format = "sas7bdat" else: raise ValueError("unable to infer format of SAS file") reader: ReaderBase if format.lower() == "xport": from pandas.io.sas.sas_xport import XportReader reader = XportReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize, ) elif format.lower() == "sas7bdat": from pandas.io.sas.sas7bdat import SAS7BDATReader reader = SAS7BDATReader( filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize, ) else: raise ValueError("unknown SAS format") if iterator or chunksize: return reader return reader.read()