Ejemplo n.º 1
0
def test_inmemory_write_after_closed():
    f = pa.InMemoryOutputStream()
    f.write(b'ok')
    f.get_result()

    with pytest.raises(IOError):
        f.write(b'not ok')
Ejemplo n.º 2
0
def test_pandas_parquet_native_file_roundtrip(tmpdir):
    df = _test_dataframe(10000)
    arrow_table = pa.Table.from_pandas(df)
    imos = pa.InMemoryOutputStream()
    pq.write_table(arrow_table, imos, version="2.0")
    buf = imos.get_result()
    reader = pa.BufferReader(buf)
    df_read = pq.read_table(reader).to_pandas()
    tm.assert_frame_equal(df, df_read)
Ejemplo n.º 3
0
def test_nativefile_write_memoryview():
    f = pa.InMemoryOutputStream()
    data = b'ok'

    arr = np.frombuffer(data, dtype='S1')

    f.write(arr)
    f.write(bytearray(data))

    buf = f.get_result()

    assert buf.to_pybytes() == data * 2
Ejemplo n.º 4
0
def test_ipc_zero_copy_numpy():
    df = pd.DataFrame({'foo': [1.5]})

    batch = pa.RecordBatch.from_pandas(df)
    sink = pa.InMemoryOutputStream()
    write_file(batch, sink)
    buffer = sink.get_result()
    reader = pa.BufferReader(buffer)

    batches = read_file(reader)

    data = batches[0].to_pandas()
    rdf = pd.DataFrame(data)
    assert_frame_equal(df, rdf)
Ejemplo n.º 5
0
def test_memory_output_stream():
    # 10 bytes
    val = b'dataabcdef'

    f = pa.InMemoryOutputStream()

    K = 1000
    for i in range(K):
        f.write(val)

    buf = f.get_result()

    assert len(buf) == len(val) * K
    assert buf.to_pybytes() == val * K
Ejemplo n.º 6
0
def serialize_pandas(df):
    """Serialize a pandas DataFrame into a buffer protocol compatible object.

    Parameters
    ----------
    df : pandas.DataFrame

    Returns
    -------
    buf : buffer
        An object compatible with the buffer protocol
    """
    batch = pa.RecordBatch.from_pandas(df)
    sink = pa.InMemoryOutputStream()
    writer = pa.RecordBatchStreamWriter(sink, batch.schema)
    writer.write_batch(batch)
    writer.close()
    return sink.get_result()
Ejemplo n.º 7
0
 def _get_sink(self):
     return pa.InMemoryOutputStream()
Ejemplo n.º 8
0
# 2 -0.114206  0.073758
# 3  0.477138 -0.063724

# 1 << 10 --> 1024: 1 Kilobyte
KILOBYTE = 1 << 10
MEGABYTE = KILOBYTE * KILOBYTE
DATA_SIZE = 1024 * MEGABYTE
NCOLS = 16

df = generate_data(MEGABYTE, NCOLS)

# df --> batch
batch = pa.RecordBatch.from_pandas(df)

# Write batches in RAM
sink = pa.InMemoryOutputStream()
stream_writer = pa.StreamWriter(sink, batch.schema)

for i in range(DATA_SIZE // MEGABYTE):
    stream_writer.write_batch(batch)

# Info
source = sink.get_result()
source.size

# Read back Arrow record batches in memory
reader = pa.StreamReader(source)
table = reader.read_all()

table
Ejemplo n.º 9
0
def test_inmemory_output_stream():
    with pytest.warns(FutureWarning):
        stream = pa.InMemoryOutputStream()
        assert isinstance(stream, pa.BufferOutputStream)