def test_inmemory_write_after_closed(): f = io.InMemoryOutputStream() f.write(b'ok') f.get_result() with pytest.raises(IOError): f.write(b'not ok')
def test_pandas_parquet_native_file_roundtrip(tmpdir): df = _test_dataframe(10000) arrow_table = pa.Table.from_pandas(df) imos = paio.InMemoryOutputStream() pq.write_table(arrow_table, imos, version="2.0") buf = imos.get_result() reader = paio.BufferReader(buf) df_read = pq.read_table(reader).to_pandas() pdt.assert_frame_equal(df, df_read)
def test_nativefile_write_memoryview(): f = io.InMemoryOutputStream() data = b'ok' arr = np.frombuffer(data, dtype='S1') f.write(arr) f.write(bytearray(data)) buf = f.get_result() assert buf.to_pybytes() == data * 2
def test_ipc_zero_copy_numpy(): df = pd.DataFrame({'foo': [1.5]}) batch = A.RecordBatch.from_pandas(df) sink = arrow_io.InMemoryOutputStream() write_file(batch, sink) buffer = sink.get_result() reader = arrow_io.BufferReader(buffer) batches = read_file(reader) data = batches[0].to_pandas() rdf = pd.DataFrame(data) assert_frame_equal(df, rdf)
def test_memory_output_stream(): # 10 bytes val = b'dataabcdef' f = io.InMemoryOutputStream() K = 1000 for i in range(K): f.write(val) buf = f.get_result() assert len(buf) == len(val) * K assert buf.to_pybytes() == val * K
def test_pandas_parquet_native_file_roundtrip(tmpdir): size = 10000 np.random.seed(0) df = pd.DataFrame({ 'uint8': np.arange(size, dtype=np.uint8), 'uint16': np.arange(size, dtype=np.uint16), 'uint32': np.arange(size, dtype=np.uint32), 'uint64': np.arange(size, dtype=np.uint64), 'int8': np.arange(size, dtype=np.int16), 'int16': np.arange(size, dtype=np.int16), 'int32': np.arange(size, dtype=np.int32), 'int64': np.arange(size, dtype=np.int64), 'float32': np.arange(size, dtype=np.float32), 'float64': np.arange(size, dtype=np.float64), 'bool': np.random.randn(size) > 0 }) arrow_table = A.from_pandas_dataframe(df) imos = paio.InMemoryOutputStream() pq.write_table(arrow_table, imos, version="2.0") buf = imos.get_result() reader = paio.BufferReader(buf) df_read = pq.read_table(reader).to_pandas() pdt.assert_frame_equal(df, df_read)
def _get_sink(self): return arrow_io.InMemoryOutputStream()
def write_to_memory2(batch): sink = arrow_io.InMemoryOutputStream() write_file(batch, sink) return sink.get_result()