def pretty_print( self, fmt: Union[None, str, TableFormat] = None, *, to: Optional[PathLike] = None, mode: str = "w", **kwargs, ) -> str: """ Outputs a pretty table using the `tabulate <https://pypi.org/project/tabulate/>`_ package. Args: fmt: A tabulate format; if None, chooses according to ``to``, falling back to ``"plain"`` to: Write to this path (.gz, .zip, etc. is inferred) mode: Write mode: 'w', 'a', or 'x' kwargs: Passed to tabulate Returns: The formatted string """ fmt = Utils.choose_table_format(path=to, fmt=fmt) s = self._tabulate(fmt, **kwargs) if to is not None: Utils.write(to, s, mode=mode) return s
def sort_natural( self, column: str, *, alg: Union[None, int, Set[str]] = None, reverse: bool = False ) -> __qualname__: """ Calls ``natsorted`` on a single column. Args: column: The name of the (single) column to sort by alg: Input as the ``alg`` argument to ``natsorted`` If ``None``, the "best" algorithm is chosen from the dtype of ``column`` via :meth:`typeddfs.utils.Utils.guess_natsort_alg`. Otherwise, :meth:typeddfs.utils.Utils.exact_natsort_alg` is called with ``Utils.exact_natsort_alg(alg)``. reverse: Reverse the sort order (e.g. 'z' before 'a') """ df = self.vanilla_reset() if alg is None: _, alg = Utils.guess_natsort_alg(self[column].dtype) else: _, alg = Utils.exact_natsort_alg(alg) zzz = natsorted([s for s in df[column]], alg=alg, reverse=reverse) df["__sort"] = df[column].map(lambda s: zzz.index(s)) df.__class__ = self.__class__ df = df.sort_values("__sort").drop("__sort", axis=1) return self.__class__._change(df)
def test_dots_and_dicts(self): dct = dict(abc=dict(xyz="123"), zzz=["456", "789"]) dots = {"abc.xyz": "123", "zzz": ["456", "789"]} act_dots = Utils.dict_to_dots(dct) assert act_dots == dots act_dct = Utils.dots_to_dict(act_dots) assert act_dct == dct
def get_short_text(self, *, recommended_only: bool = False) -> str: """ Returns a single-line text listing of allowed file formats. Args: recommended_only: Skip non-recommended file formats Returns: Something like:: .csv, .tsv/.tab, or .flexwf [.gz,/.xz,/.zip/.bz2]; .feather, .pickle, or .snappy ... """ fmts = [ f for f in self if not recommended_only or f.fmt.is_recommended ] text_fmts = Utils.natsort( ["/".join(f.bare_suffixes) for f in fmts if f.fmt.is_text], dtype=str) bin_fmts = Utils.natsort( ["/".join(f.bare_suffixes) for f in fmts if f.fmt.is_binary], dtype=str) txt = "" if len(text_fmts) > 0: txt += (Utils.join_to_str(*text_fmts, last="or") + " [" + "/".join( [s.suffix for s in CompressionFormat.list_non_empty()]) + "]") if len(bin_fmts) > 0: txt += ("; " if len(text_fmts) > 0 else "") + Utils.join_to_str( *bin_fmts, last="or") return txt
def bare_suffixes(self) -> Sequence[str]: """ Returns all suffixes, excluding compressed variants (etc. ``.gz``), naturally sorted. """ suffixes = { CompressionFormat.strip_suffix(s).name for s in self.fmt.suffixes } return Utils.natsort(suffixes, str)
def strip_control_chars(self) -> __qualname__: """ Removes all control characters (Unicode group 'C') from all string-typed columns. """ df = self.vanilla_reset() for c in df.columns: if Utils.is_string_dtype(df[c]): df[c] = df[c].map(Utils.strip_control_chars) return self.__class__._convert_typed(df)
def test_list(self): x: FrozeList = Utils.freeze([1, 2, 3]) assert isinstance(x, FrozeList) assert x.to_list() == [1, 2, 3] assert str(x) == str(x.to_list()) assert repr(x) == repr(x.to_list()) y: FrozeList = Utils.freeze([1, 2, 1]) assert x == x and y == y assert not x < x and not y < y assert x > y assert hash(x) == hash(x) assert hash(x) != hash(y) assert x.get(1) == 1 assert x.get(5) is None assert x.get(5, 100) == 100 assert x.req(1) == 1 assert x.req(5, 100) == 100 with pytest.raises(KeyError): x.req(5)
def _build(self) -> Type[BaseDf]: if self._secure and self._hash_alg in Utils.insecure_hash_functions(): raise DfTypeConstructionError( f"Hash algorithm {self._hash_alg} forbidden by .secure()") self._check_final() _io_typing = IoTyping[BaseDf]( _remap_suffixes=dict(self._remapped_suffixes), _text_encoding=self._encoding, _read_kwargs=dict(self._read_kwargs), _write_kwargs=dict(self._write_kwargs), _hash_alg=self._hash_alg, _save_hash_file=self._hash_file, _save_hash_dir=self._hash_dir, _secure=self._secure, _recommended=self._recommended, _attrs_suffix=_DEFAULT_ATTRS_SUFFIX if self._attr_suffix is None else self._attr_suffix, _use_attrs=self._attr_suffix is not None, _attrs_json_kwargs=self._attr_json_kwargs, _custom_readers={k: v[0] for k, v in self._custom_formats.items()}, _custom_writers={k: v[1] for k, v in self._custom_formats.items()}, ) _typing = DfTyping( _io_typing=_io_typing, _auto_dtypes=dict(self._dtypes), _post_processing=self._post_processing, _verifications=self._verifications, _more_index_names_allowed=not self._strict_meta, _more_columns_allowed=not self._strict_cols, _required_columns=list(self._req_cols), _required_index_names=list(self._req_meta), _reserved_columns=list(self._res_cols), _reserved_index_names=list(self._res_meta), _columns_to_drop=set(self._drop), _index_series_name=self._index_series_name, _column_series_name=self._column_series_name, _value_dtype=self._value_dtype, ) class New(self._clazz, *self._classes): @classmethod def get_typing(cls) -> DfTyping: return _typing New.__name__ = self._name New.__doc__ = self._doc for k, v in self._methods.items(): setattr(New, k, v) for k, v in self._classmethods.items(): setattr(New, k, classmethod(v)) return New
def _cols(self, which: Sequence[str], *, short: bool) -> Sequence[str]: lst = [] for c in which: t = self.typing.auto_dtypes.get(c) if t is not None: t = Utils.describe_dtype(t, short=short) if t is None: lst.append(c) else: lst.append(f"{c} ({t})") return lst
def get_short_typing_text(self) -> str: """ Returns a short text description of the required format for a matrix. """ t = self.typing if t.value_dtype is None: s = "Matrix. " else: s = Utils.describe_dtype(t.value_dtype).capitalize() s += f" ({t.value_dtype.__name__}) matrix. " s += "List row names in the index or a special column 'row'." return s
def test_set(self): x: FrozeSet = Utils.freeze({1, 2, 3}) assert isinstance(x, FrozeSet) assert x.to_set() == {1, 2, 3} assert str(x) == str(x.to_set()) assert repr(x) == repr(x.to_set()) assert x.to_frozenset() == frozenset({1, 2, 3}) y: FrozeSet = Utils.freeze({1, 2, 1}) assert x == x and y == y assert not x < x and not y < y assert x > y assert hash(x) == hash(x) assert hash(x) != hash(y) assert not x.isdisjoint(y) assert x.get(1) == 1 assert x.get(5) is None assert x.get(5, 100) == 100 assert x.req(1) == 1 assert x.req(5, 100) == 100 with pytest.raises(KeyError): x.req(5)
def to_rst(self, path_or_none: Optional[PathLike] = None, style: str = "simple", mode: str = "w") -> Optional[str]: """ Writes a reStructuredText table. Args: path_or_none: Either a file path or ``None`` to return the string style: The type of table; currently only "simple" is supported mode: Write mode """ txt = self._tabulate(fmt="rst") + "\n" return Utils.write(path_or_none, txt, mode=mode)
def _read_properties_like( cls, unescape_keys, unescape_values, comment_chars: Set[str], strip_quotes: bool, path_or_buff, **kwargs, ) -> __qualname__: r""" Reads a .properties-like file. """ cls._assert_can_write_properties_class() if len(cls.get_typing().required_names) == 2: key_col, val_col = cls.get_typing().required_names else: key_col, val_col = "key", "value" txt = Utils.read(path_or_buff, **kwargs) keys = [] values = [] section = "" for i, line in enumerate(txt.splitlines()): try: line = line.strip() if any((line.startswith(c) for c in comment_chars)) or len(line.strip()) == 0: continue if line.startswith("["): # treat [ ] (with spaces) as the global key section = line.lstrip("[").rstrip("]").strip() continue key, value = line.split("=") key, value = key.strip(), value.strip() if unescape_keys is not None: key = unescape_keys(key) if value.endswith("\\"): raise ValueError( "Ends with \\; continued lines are not yet supported") if unescape_values is not None: value = unescape_values(value) if strip_quotes: value = value.strip('"') if section != "": key = section + "." + key keys.append(key) values.append(value) except ValueError: raise ValueError(f"Malformed line {i}: '{line}'") df = pd.DataFrame({key_col: keys, val_col: values}) return cls.convert(df)
def sort_natural_index(self, *, alg: int = None, reverse: bool = False) -> __qualname__: """ Calls natsorted on this index. Works for multi-index too. Args: alg: Input as the ``alg`` argument to ``natsorted`` If ``None``, the "best" algorithm is chosen from the dtype of ``column`` via :meth:`typeddfs.utils.Utils.guess_natsort_alg`. Otherwise, :meth:typeddfs.utils.Utils.exact_natsort_alg` is called with ``Utils.exact_natsort_alg(alg)``. reverse: Reverse the sort order (e.g. 'z' before 'a') """ df = self.copy() if alg is None: # TODO: Does this work for multi-index? _, alg = Utils.guess_natsort_alg(self.index.dtype) else: _, alg = Utils.exact_natsort_alg(alg) zzz = natsorted([s for s in df.index], alg=alg) df["__sort"] = df.index.map(lambda s: zzz.index(s)) df.__class__ = self.__class__ df = df.sort_values("__sort").drop_cols(["__sort"]) return self.__class__._change(df)
def _get_write_kwargs(cls, fmt: Optional[FileFormat], path: Path) -> Mapping[str, Any]: t = cls.get_typing().io real_suffix = CompressionFormat.strip_suffix(path).suffix kwargs = t.write_kwargs.get(fmt, {}) kwargs.update(t.write_suffix_kwargs.get(real_suffix, {})) if fmt is FileFormat.json: # not perfect, but much better than the alternative of failing # I don't see a better solution anyway kwargs["force_ascii"] = False elif (fmt is not None and fmt.supports_encoding ): # and IS NOT JSON -- it doesn't use "encoding=" encoding = kwargs.get("encoding", t.text_encoding) kwargs["encoding"] = Utils.get_encoding(encoding) return kwargs
def test_dict(self): x: FrozeDict = Utils.freeze({1: "cat", 2: "dog"}) assert isinstance(x, FrozeDict) assert str(x) == str(x.to_dict()) assert repr(x) == repr(x.to_dict()) y: FrozeDict = Utils.freeze({1: "cat", 2: "zebra"}) z: FrozeDict = Utils.freeze({2: "cat", 3: "aardvark"}) assert x == x and y == y and z == z assert x != y and x != z and y != z assert x < z assert x < y assert y < z assert not x < x assert not y < y assert not z < z assert hash(x) == hash(x) and hash(y) == hash(y) and hash(z) == hash(z) assert hash(x) != hash(y) assert x.get(1) == "cat" assert x.get(5) is None assert x.get(5, "elephant") == "elephant" assert x.req(1) == "cat" assert x.req(5, "elephant") == "elephant" with pytest.raises(KeyError): x.req(5)
def _to_properties_like( self, escape_keys, escape_values, sep: str, comment_char: str, path_or_buff=None, mode: str = "w", comment: Union[None, str, Sequence[str]] = None, **kwargs, ) -> Optional[str]: r""" Writes a .properties-like file. """ comment = [] if comment is None else ( [comment] if isinstance(comment, str) else comment) self.__class__._assert_can_write_properties_class() self._assert_can_write_properties_instance() df = self.vanilla_reset() if len(self.__class__.get_typing().required_names) == 2: key_col, val_col = self.__class__.get_typing().required_names else: key_col, val_col = "key", "value" df.columns = [key_col, val_col] df = df.sort_values(key_col) # essential lines = [ comment_char.lstrip(comment_char).lstrip() + " " + c for c in comment ] section = "" for k, v in zip(df[key_col], df[val_col]): if "." in k: k, s = str(k).split(".", 1) s, k = k.strip(), s.strip() if s != section: lines.append(f"[{s}]") if escape_keys: k = escape_keys(k) if escape_values: v = escape_values(v) lines.append(k + " " + sep + " " + v.strip('"')) return Utils.write(path_or_buff, os.linesep.join(lines), mode=mode, **kwargs)
def _get_read_kwargs(cls, fmt: Optional[FileFormat], path: Path) -> Mapping[str, Any]: t = cls.get_typing().io real_suffix = CompressionFormat.strip_suffix(path).suffix kwargs = t.read_kwargs.get(fmt, {}) kwargs.update(t.read_suffix_kwargs.get(real_suffix, {})) if fmt in [ FileFormat.csv, FileFormat.tsv, FileFormat.properties, FileFormat.lines, FileFormat.flexwf, FileFormat.fwf, FileFormat.json, ]: encoding = kwargs.get("encoding", t.text_encoding) kwargs["encoding"] = Utils.get_encoding(encoding) return kwargs
def get_short_typing_text(self) -> str: """ Returns a condensed text description of the required and optional columns. """ t = self.typing req = self.get_required_cols(short=True) res = self.get_reserved_cols(short=True) s = "" if len(req) > 0: s += f"Requires columns {Utils.join_to_str(*req, last='and')}." if len(res) > 0: s += ((" " if len(s) > 0 else " ") + "Columns " + Utils.join_to_str(*res, last="and") + " are optional.") s += " " if t.is_strict: s += "More columns are ok." else: s += "No extra columns are allowed." return s
def get_long_text( self, *, recommended_only: bool = False, nl: str = "\n", bullet: str = "- ", indent: str = " ", ) -> str: r""" Returns a multi-line text listing of allowed file formats. Args: recommended_only: Skip non-recommended file formats nl: Newline characters; use "\n", "\\n", or " " bullet: Prepended to each item indent: Spaces for nested indent Returns: Something like:: [[ Supported formats ]]: .csv[.bz2/.gz/.xz/.zip]: comma-delimited .parquet/.snappy: Parquet .h5/.hdf/.hdf5: HDF5 (key 'df') [discouraged] .pickle/.pkl: Python Pickle [discouraged] """ bullet = nl + indent + bullet fmts = [ f for f in self if not recommended_only or f.fmt.is_recommended ] formats = [ f.get_text() + ("" if f.fmt.is_recommended else " [avoid]") for f in fmts ] formats = Utils.natsort(formats, str) txt = bullet + bullet.join(formats) return f"[[ Supported formats ]]: {txt}"
def read_file( cls, path: Union[Path, str], *, file_hash: Optional[bool] = None, dir_hash: Optional[bool] = None, hex_hash: Optional[str] = None, attrs: Optional[bool] = None, ) -> __qualname__: """ Reads from a file (or possibly URL), guessing the format from the filename extension. Delegates to the ``read_*`` functions of this class. You can always write and then read back to get the same dataframe. .. code-block:: # df is any DataFrame from typeddfs # path can use any suffix df.write_file(path)) df.read_file(path) Text files always allow encoding with .gz, .zip, .bz2, or .xz. Supports: - .csv, .tsv, or .tab - .json - .xml - .feather - .parquet or .snappy - .h5 or .hdf - .xlsx, .xls, .odf, etc. - .toml - .properties - .ini - .fxf (fixed-width) - .flexwf (fixed-but-unspecified-width with an optional delimiter) - .txt, .lines, or .list See Also: :meth:`read_url` :meth:`write_file` Args: path: Only path-like strings or pathlib objects are supported, not buffers (because we need a filename). file_hash: Check against a hash file specific to this file (e.g. <path>.sha1) dir_hash: Check against a per-directory hash file hex_hash: Check against this hex-encoded hash attrs: Set dataset attributes/metadata (``pd.DataFrame.attrs``) from a JSON file. If True, uses :attr:`typeddfs.df_typing.DfTyping.attrs_suffix`. If a str or Path, uses that file. If None or False, does not set. Returns: An instance of this class """ if any((str(path).startswith(x + "://") for x in ["http", "https", "ftp"])): # just save some pain -- better than a weird error in .resolve() raise ValueError( f"Cannot read from URL {path}; use read_url instead") path = Path(path).resolve() t: DfTyping = cls.get_typing() if attrs is None: attrs = t.io.use_attrs cs = Checksums(alg=t.io.hash_algorithm) cs.verify_any(path, file_hash=file_hash, dir_hash=dir_hash, computed=hex_hash) df = cls._call_read(cls, path) if attrs: attrs_path = path.parent / (path.name + t.io.attrs_suffix) json_data = Utils.json_decoder().from_str( attrs_path.read_text(encoding="utf-8")) df.attrs.update(json_data) return cls._convert_typed(df)
def unhashable_list(self): x: FrozeList = Utils.freeze([[1]]) y: FrozeList = Utils.freeze([[1]]) assert hash(x) == 1 assert {x} != {y}
def test_encoding(self): assert Utils.get_encoding("platform") == sys.getdefaultencoding() assert "bom" not in Utils.get_encoding("utf8(bom)") assert "bom" not in Utils.get_encoding("utf16(bom)") assert Utils.get_encoding("UTF-8") == "utf8" assert Utils.get_encoding("utf-16") == "utf16"
def test_strip_control_chars(self): assert Utils.strip_control_chars("ab\ncd") == "abcd" assert Utils.strip_control_chars("ab\0\0cℶd") == "abcℶd" assert Utils.strip_control_chars("ℶℶ\u202Cℶℶ") == "ℶℶℶℶ" assert Utils.strip_control_chars("\u202C") == ""
def test_basic(self): assert "sha1" in Utils.insecure_hash_functions() assert "__xml_index_" in Utils.banned_names()
def test_table_formats(self): formats = list(Utils.table_formats()) assert len(formats) > 10 assert "simple" in formats x = Utils.table_format("simple") assert isinstance(x, TableFormat)
def write_file( self, path: Union[Path, str], *, overwrite: bool = True, mkdirs: bool = False, file_hash: Optional[bool] = None, dir_hash: Optional[bool] = None, attrs: Optional[bool] = None, ) -> Optional[str]: """ Writes to a file, guessing the format from the filename extension. Delegates to the ``to_*`` functions of this class (e.g. ``to_csv``). Only includes file formats that can be read back in with corresponding ``to`` methods. Supports, where text formats permit optional .gz, .zip, .bz2, or .xz: - .csv, .tsv, or .tab - .json - .feather - .fwf (fixed-width) - .flexwf (columns aligned but using a delimiter) - .parquet or .snappy - .h5, .hdf, or .hdf5 - .xlsx, .xls, and other variants for Excel - .odt and .ods (OpenOffice) - .xml - .toml - .ini - .properties - .pkl and .pickle - .txt, .lines, or .list; see :meth:`to_lines` and :meth:`read_lines` See Also: :meth:`read_file` Args: path: Only path-like strings or pathlib objects are supported, not buffers (because we need a filename). overwrite: If False, complain if the file already exists mkdirs: Make the directory and parents if they do not exist file_hash: Write a hash for this file. The filename will be path+"."+algorithm. If None, chooses according to ``self.get_typing().io.hash_file``. dir_hash: Append a hash for this file into a list. The filename will be the directory name suffixed by the algorithm; (i.e. path.parent/(path.parent.name+"."+algorithm) ). If None, chooses according to ``self.get_typing().io.hash_dir``. attrs: Write dataset attributes/metadata (``pd.DataFrame.attrs``) to a JSON file. uses :attr:`typeddfs.df_typing.DfTyping.attrs_suffix`. If None, chooses according to ``self.get_typing().io.use_attrs``. Returns: Whatever the corresponding method on ``pd.to_*`` returns. This is usually either str or None Raises: InvalidDfError: If the DataFrame is not valid for this type ValueError: If the type of a column or index name is non-str """ if any((str(path).startswith(x + "://") for x in ["http", "https", "ftp"])): # just save some pain -- better than a weird error in .resolve() raise ValueError(f"Cannot write to URL {path}") path = Path(path).resolve() t = self.__class__.get_typing() file_hash = file_hash is True or file_hash is None and t.io.file_hash dir_hash = dir_hash is True or dir_hash is None and t.io.dir_hash attrs = attrs is True or attrs is None and t.io.use_attrs attrs_path = path.parent / (path.name + t.io.attrs_suffix) attrs_data = Utils.json_encoder().as_str(self.attrs) cs = Checksums(alg=t.io.hash_algorithm) file_hash_path = cs.get_filesum_of_file(path) dir_hash_path = cs.get_dirsum_of_file(path) # check for overwrite errors now to preserve atomicity if not overwrite: if path.exists(): raise FileExistsError(f"File {path} already exists") if file_hash and file_hash_path.exists(): raise HashFileExistsError(f"{file_hash_path} already exists") if dir_hash_path.exists(): dir_sums = Checksums( alg=t.io.hash_algorithm).load_dirsum_exact(dir_hash_path) if path in dir_sums: raise HashEntryExistsError( f"Path {path} listed in {dir_hash_path}") if file_hash and file_hash_path.exists(): raise HashFileExistsError(f"{file_hash_path} already exists") if attrs and attrs_path.exists(): raise FileExistsError(f"{attrs_path} already exists") self._check(self) types = set(self.column_names()).union(self.index_names()) if any((not isinstance(c, str) for c in types)): raise NonStrColumnError( f"Columns must be of str type to serialize, not {types}") # now we're ready to write if mkdirs: path.parent.mkdir(exist_ok=True, parents=True) # to get a FileNotFoundError instead of a WritePermissionsError: if not mkdirs and not path.parent.exists(): raise FileNotFoundError(f"Directory {path.parent} not found") # check for lack of write-ability to any of the files # we had to do this after creating the dirs unfortunately _all_files = [(attrs, attrs_path), (file_hash, file_hash_path), (dir_hash, dir_hash_path)] all_files = [f for a, f in _all_files if a] all_dirs = [f.parent for (a, f) in _all_files] # we need to check both the dirs and the files Utils.verify_can_write_dirs(*all_dirs, missing_ok=False) Utils.verify_can_write_files(*all_files, missing_ok=True) # we verified as much as we can -- finally we can write!! # this writes the main file: z = self._call_write(path) # write the hashes # this shouldn't fail cs = Checksums(alg=t.io.hash_algorithm) cs.write_any( path, to_file=file_hash, to_dir=dir_hash, overwrite=overwrite, ) # write dataset attributes # this also shouldn't fail if attrs: attrs_path.write_text(attrs_data, encoding="utf8") return z
def all_suffixes(self) -> Sequence[str]: """ Returns all suffixes, naturally sorted. """ return Utils.natsort(self.fmt.suffixes, str)
def is_text_encoding_utf(self) -> bool: return Utils.get_encoding( self._text_encoding) in ["utf-8", "utf-16", "utf-32"]
def to_fwf( self, path_or_buff=None, mode: str = "w", colspecs: Optional[Sequence[Tuple[int, int]]] = None, widths: Optional[Sequence[int]] = None, na_rep: Optional[str] = None, float_format: Optional[str] = None, date_format: Optional[str] = None, decimal: str = ".", **kwargs, ) -> Optional[str]: """ Writes a fixed-width text format. See ``read_fwf`` and ``to_flexwf`` for more info. .. warning: This method is a preview. Not all options are complete, and behavior is subject to change in a future (major) version. Notably, Pandas may eventually introduce a method with the same name. Args: path_or_buff: Path or buffer mode: write or append (w/a) colspecs: A list of tuples giving the extents of the fixed-width fields of each line as half-open intervals (i.e., [from, to[ ) widths: A list of field widths which can be used instead of ``colspecs`` if the intervals are contiguous na_rep: Missing data representation float_format: Format string for floating point numbers date_format: Format string for datetime objects decimal: Character recognized as decimal separator. E.g. use ‘,’ for European data. kwargs: Passed to :meth:`typeddfs.utils.Utils.write` Returns: The string data if ``path_or_buff`` is a buffer; None if it is a file """ if colspecs is not None and widths is not None: raise ValueError("Both widths and colspecs passed") if widths is not None: colspecs = [] at = 0 for w in widths: colspecs.append((at, at + w)) at += w # if colspecs is None: if True: # TODO: use format, etc. content = self._tabulate(Utils.plain_table_format(sep=" "), disable_numparse=True) else: df = self.vanilla_reset() if len(df.columns) != len(colspecs): raise ValueError( f"{colspecs} column intervals for {len(df.columns)} columns" ) for col, (start, end) in zip(df.columns, colspecs): width = end - start mx = df[col].map(str).map(len).max() if mx > width: raise ValueError( f"Column {col} has max length {mx} > {end-start}") _number_format = { "na_rep": na_rep, "float_format": float_format, "date_format": date_format, "quoting": csv.QUOTE_NONE, "decimal": decimal, } res = df._mgr.to_native_types(**_number_format) data: Sequence[Sequence[Any]] = [ res.iget_values(i) for i in range(len(res.items)) ] content = None # TODO if path_or_buff is None: return content _encoding = dict( encoding=kwargs.get("encoding")) if "encoding" in kwargs else {} _compression = dict(encoding=kwargs.get( "compression")) if "compression" in kwargs else {} Utils.write(path_or_buff, content, mode=mode, **_encoding, **_compression)