def test_formats_serdes(): objects = [ { 'blah': 'foo' }, b'blather', 'blip', ] metadata = [{} for o in objects] for obj, meta in zip(objects, metadata): data, format_meta = FormatRegistry.serialize(obj, meta) meta.update(format_meta) assert FormatRegistry.deserialize(data, meta) == obj meta = {} df1 = pd.DataFrame([[1, 2], [3, 4]]) data, format_meta = FormatRegistry.serialize(df1, meta) meta.update(format_meta) df2 = FormatRegistry.deserialize(data, meta) # we can't really get around this nicely -- if header is used, and header names are numeric, # once loaded from CSV, header names are now strings. This causes a bad comparison, so we # cast to int again. df2.columns = df2.columns.astype(int, copy=False) assert df1.equals(df2)
def test_formats_csv_roundtrip(): test_data = b'9,2,5\n7,2,6\n1,0,1\n' # roundtrip defaults. meta = {'format': {'name': 'csv'}} df1 = FormatRegistry.deserialize(test_data, meta) bin, format_meta = FormatRegistry.serialize(df1, meta) meta.update(format_meta) df2 = FormatRegistry.deserialize(bin, meta) assert test_data == bin assert df1.equals(df2) # interpret first row as header meta = {'format': {'name': 'csv', 'opts': {'use_header': True}}} df1 = FormatRegistry.deserialize(test_data, meta) bin, format_meta = FormatRegistry.serialize(df1, meta) meta.update(format_meta) df2 = FormatRegistry.deserialize(bin, meta) assert test_data == bin assert df1.equals(df2) # interpret first column as index meta = {'format': {'name': 'csv', 'opts': {'use_index': True}}} df1 = FormatRegistry.deserialize(test_data, meta) bin, format_meta = FormatRegistry.serialize(df1, meta) meta.update(format_meta) df2 = FormatRegistry.deserialize(bin, meta) assert test_data == bin assert df1.equals(df2) # interpret first row as header, and first column as index meta = { 'format': { 'name': 'csv', 'opts': { 'use_index': True, 'use_header': True } } } df1 = FormatRegistry.deserialize(test_data, meta) bin, format_meta = FormatRegistry.serialize(df1, meta) meta.update(format_meta) df2 = FormatRegistry.deserialize(bin, meta) assert test_data == bin assert df1.equals(df2)
def test_formats_csv_read(): csv_file = pathlib.Path(__file__).parent / 'data' / 'csv.csv' meta = {'format': {'name': 'csv'}} expected_bytes = b'a,b,c,d\n1,2,3,4\n5,6,7,8\n' expected_df = FormatRegistry.deserialize(expected_bytes, meta) df = FormatRegistry.deserialize(csv_file.read_bytes(), meta) assert df.equals(expected_df) assert expected_bytes == FormatRegistry.serialize(df, meta)[0]