def test_iterator(self): self.setUp() l = [self.frame["float"], self.frame["float"].A, self.frame["float"].B, None] with ensure_clean(self.path) as path: to_msgpack(path, *l) for i, packed in enumerate(read_msgpack(path, iterator=True)): check_arbitrary(packed, l[i])
def test_string_io(self): df = DataFrame(np.random.randn(10, 2)) s = df.to_msgpack(None) result = read_msgpack(s) tm.assert_frame_equal(result, df) s = df.to_msgpack() result = read_msgpack(s) tm.assert_frame_equal(result, df) s = df.to_msgpack() result = read_msgpack(BytesIO(s)) tm.assert_frame_equal(result, df) s = to_msgpack(None, df) result = read_msgpack(s) tm.assert_frame_equal(result, df) with ensure_clean(self.path) as p: s = df.to_msgpack() fh = open(p, "wb") fh.write(s) fh.close() result = read_msgpack(p) tm.assert_frame_equal(result, df)
def pd_write(df, fn, index_column=None, **kwargs): """NOTE: index is dropped when writing pandas dataframe.""" if index_column: prev_index_column = df.index.name or "index" df = df.reset_index() df[index_column] = df.pop(prev_index_column) ext = os.path.splitext(fn)[-1] if ".zip" in ext: ext = os.path.splitext(fn[:-4])[-1] kwargs["compression"] = "zip" kwargs["index"] = False if ext == ".hdf" and "key" not in kwargs: kwargs["key"] = "key" tmp_fn = fn + ".tmp" if ext == ".msgpack": from mbf_pandas_msgpack import to_msgpack res = to_msgpack(tmp_fn, df) else: res = getattr(df, f"to_{ext[1:]}")(tmp_fn, **kwargs) if os.path.exists(fn): remove_path(fn) rename_path(tmp_fn, fn) return res
import pandas as pd import numpy as np import datetime import mbf_pandas_msgpack df = pd.DataFrame( { "A_int": [0, 1, 2], "C_float": [123.3, 1.8, np.nan], "D_str": ["a", "b", "c"], "E_date": [ datetime.datetime.fromtimestamp(0), datetime.datetime.fromtimestamp(16e8), datetime.datetime.fromtimestamp(16e9), ], "F_tuple": [("shu", 23), ("sha, 24"), ("shum", 25)], } ) mbf_pandas_msgpack.to_msgpack(open(f"samples/sample_pandas_{pd.__version__}.msgpack", "wb"), df)
def test_iterator_with_string_io(self): dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)] s = to_msgpack(None, *dfs) for i, result in enumerate(read_msgpack(s, iterator=True)): tm.assert_frame_equal(result, dfs[i])
def encode_decode(self, x, compress=None, **kwargs): with ensure_clean(self.path) as p: to_msgpack(p, x, compress=compress, **kwargs) return read_msgpack(p, **kwargs)