Beispiel #1
0
def test_formats_serdes():
    objects = [
        {
            'blah': 'foo'
        },
        b'blather',
        'blip',
    ]
    metadata = [{} for o in objects]

    for obj, meta in zip(objects, metadata):
        data, format_meta = FormatRegistry.serialize(obj, meta)
        meta.update(format_meta)
        assert FormatRegistry.deserialize(data, meta) == obj

    meta = {}
    df1 = pd.DataFrame([[1, 2], [3, 4]])
    data, format_meta = FormatRegistry.serialize(df1, meta)
    meta.update(format_meta)
    df2 = FormatRegistry.deserialize(data, meta)

    # we can't really get around this nicely -- if header is used, and header names are numeric,
    # once loaded from CSV, header names are now strings.  This causes a bad comparison, so we
    # cast to int again.
    df2.columns = df2.columns.astype(int, copy=False)

    assert df1.equals(df2)
Beispiel #2
0
def test_formats_csv_roundtrip():
    test_data = b'9,2,5\n7,2,6\n1,0,1\n'

    # roundtrip defaults.
    meta = {'format': {'name': 'csv'}}
    df1 = FormatRegistry.deserialize(test_data, meta)
    bin, format_meta = FormatRegistry.serialize(df1, meta)
    meta.update(format_meta)
    df2 = FormatRegistry.deserialize(bin, meta)

    assert test_data == bin
    assert df1.equals(df2)

    # interpret first row as header
    meta = {'format': {'name': 'csv', 'opts': {'use_header': True}}}
    df1 = FormatRegistry.deserialize(test_data, meta)
    bin, format_meta = FormatRegistry.serialize(df1, meta)
    meta.update(format_meta)
    df2 = FormatRegistry.deserialize(bin, meta)

    assert test_data == bin
    assert df1.equals(df2)

    # interpret first column as index
    meta = {'format': {'name': 'csv', 'opts': {'use_index': True}}}
    df1 = FormatRegistry.deserialize(test_data, meta)
    bin, format_meta = FormatRegistry.serialize(df1, meta)
    meta.update(format_meta)
    df2 = FormatRegistry.deserialize(bin, meta)

    assert test_data == bin
    assert df1.equals(df2)

    # interpret first row as header, and first column as index
    meta = {
        'format': {
            'name': 'csv',
            'opts': {
                'use_index': True,
                'use_header': True
            }
        }
    }
    df1 = FormatRegistry.deserialize(test_data, meta)
    bin, format_meta = FormatRegistry.serialize(df1, meta)
    meta.update(format_meta)
    df2 = FormatRegistry.deserialize(bin, meta)

    assert test_data == bin
    assert df1.equals(df2)
Beispiel #3
0
def test_formats_csv_read():
    csv_file = pathlib.Path(__file__).parent / 'data' / 'csv.csv'

    meta = {'format': {'name': 'csv'}}
    expected_bytes = b'a,b,c,d\n1,2,3,4\n5,6,7,8\n'
    expected_df = FormatRegistry.deserialize(expected_bytes, meta)
    df = FormatRegistry.deserialize(csv_file.read_bytes(), meta)

    assert df.equals(expected_df)
    assert expected_bytes == FormatRegistry.serialize(df, meta)[0]