Exemple #1
0
def test_serialize_pandas_no_preserve_index():
    df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3])
    expected = pd.DataFrame({'a': [1, 2, 3]})

    buf = pa.serialize_pandas(df, preserve_index=False)
    result = pa.deserialize_pandas(buf)
    assert_frame_equal(result, expected)

    buf = pa.serialize_pandas(df, preserve_index=True)
    result = pa.deserialize_pandas(buf)
    assert_frame_equal(result, df)
Exemple #2
0
 def serialize(obj):
     if isinstance(obj, bytes):
         return obj
     if isinstance(obj, pd.DataFrame) and np.product(obj.shape) > 30000:
         pa_buffer = pa.serialize_pandas(obj)
         return pa_buffer.to_pybytes()
     return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
Exemple #3
0
def test_pandas_serialize_round_trip_nthreads():
    index = pd.Index([1, 2, 3], name='my_index')
    columns = ['foo', 'bar']
    df = pd.DataFrame(
        {'foo': [1.5, 1.6, 1.7], 'bar': list('abc')},
        index=index, columns=columns
    )
    buf = pa.serialize_pandas(df)
    result = pa.deserialize_pandas(buf, nthreads=2)
    assert_frame_equal(result, df)
Exemple #4
0
def test_pandas_serialize_round_trip_multi_index():
    index1 = pd.Index([1, 2, 3], name='level_1')
    index2 = pd.Index(list('def'), name=None)
    index = pd.MultiIndex.from_arrays([index1, index2])

    columns = ['foo', 'bar']
    df = pd.DataFrame(
        {'foo': [1.5, 1.6, 1.7], 'bar': list('abc')},
        index=index,
        columns=columns,
    )
    buf = pa.serialize_pandas(df)
    result = pa.deserialize_pandas(buf)
    assert_frame_equal(result, df)
 def time_serialize_pandas(self):
     pa.serialize_pandas(self.df)
Exemple #6
0
def test_pandas_serialize_round_trip_not_string_columns():
    df = pd.DataFrame(list(zip([1.5, 1.6, 1.7], 'abc')))
    buf = pa.serialize_pandas(df)
    result = pa.deserialize_pandas(buf)
    assert_frame_equal(result, df)
Exemple #7
0
def _check_serialize_pandas_round_trip(df, nthreads=1):
    buf = pa.serialize_pandas(df, nthreads=nthreads)
    result = pa.deserialize_pandas(buf, nthreads=nthreads)
    assert_frame_equal(result, df)
Exemple #8
0
def _check_serialize_pandas_round_trip(df, use_threads=False):
    buf = pa.serialize_pandas(df, nthreads=2 if use_threads else 1)
    result = pa.deserialize_pandas(buf, use_threads=use_threads)
    assert_frame_equal(result, df)
 def _serialize_pandas_series(obj):
     return serialize_pandas(pd.DataFrame({obj.name: obj}))
Exemple #10
0
 def _serialize_pandas_dataframe(obj):
     return serialize_pandas(obj).to_pybytes()
Exemple #11
0
 def _serialize_pandas_series(obj):
     return serialize_pandas(pd.DataFrame({obj.name: obj}))
Exemple #12
0
 def _serialize_pandas_dataframe(obj):
     return serialize_pandas(obj)
Exemple #13
0
def _check_serialize_pandas_round_trip(df, use_threads=False):
    buf = pa.serialize_pandas(df, nthreads=2 if use_threads else 1)
    result = pa.deserialize_pandas(buf, use_threads=use_threads)
    assert_frame_equal(result, df)
 def setup(self):
     # 10 million length
     n = 10000000
     self.df = pd.DataFrame({'data': np.random.randn(n)})
     self.serialized = pa.serialize_pandas(self.df)
 def _serialize_pandas_dataframe(obj):
     return serialize_pandas(obj)
Exemple #16
0
def test_pandas_serialize_round_trip_not_string_columns():
    df = pd.DataFrame(list(zip([1.5, 1.6, 1.7], 'abc')))
    buf = pa.serialize_pandas(df)
    result = pa.deserialize_pandas(buf)
    assert_frame_equal(result, df)
Exemple #17
0
 def _serialize_pandas_series(obj):
     # TODO: serializing Series without extra copy
     return serialize_pandas(pd.DataFrame({obj.name: obj})).to_pybytes()