Python CompressedOutputStream Examples

Programming Language: Python

Namespace/Package Name: pyarrow

Method/Function: CompressedOutputStream

Examples at hotexamples.com: 9

Python CompressedOutputStream - 9 examples found. These are the top rated real world Python examples of pyarrow.CompressedOutputStream extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def test_output_stream_constructor(tmpdir):
    if not Codec.is_available("gzip"):
        pytest.skip("gzip support is not built")
    with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream:
        stream.write(b"test")
    with (tmpdir / "ctor2.gz").open("wb") as f:
        with pa.CompressedOutputStream(f, "gzip") as stream:
            stream.write(b"test")

Example #2

Show file

File: use_arrow.py Project: hahagan/study

def test_compress_stream(batch):
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, "lz4") as compressed:
        pa.serialize(batch).write_to(compressed)
    cdata = raw.getvalue()
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, "lz4") as compressed:
        tmp = pa.deserialize(compressed.read())

Example #3

Show file

File: test_io.py Project: zhu2856061/arrow

def make_compressed_output(data, fn, compression):
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression) as compressed:
        assert not compressed.closed
        assert not compressed.readable()
        assert compressed.writable()
        assert not compressed.seekable()
        compressed.write(data)
    assert compressed.closed
    assert raw.closed
    with open(fn, "wb") as f:
        f.write(raw.getvalue())

Example #4

Show file

File: serialization.py Project: erickfaria/descarteslabs-python

def serialize_pyarrow(data: bytes, codec: str):
    """
    Serialize an object and compress with a specific codec. Returns the
    serialized, compressed bytes in a pyarrow.Buffer. The caller is
    responsible for reading the returned bytes into a file, if necessary.

    Should be used in conjunction with `deserialize_pyarrow`.
    """
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression=codec) as compressed:
        pa.serialize_to(data, compressed, context=serialization_context)
    return raw.getvalue()

Example #5

Show file

File: test_io.py Project: zhu2856061/arrow

def test_compressed_roundtrip(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    data = b"some test data\n" * 10 + b"eof\n"
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression) as compressed:
        compressed.write(data)

    cdata = raw.getvalue()
    assert len(cdata) < len(data)
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, compression) as compressed:
        got = compressed.read()
        assert got == data

Example #6

Show file

File: test_io.py Project: zhu2856061/arrow

def test_compressed_recordbatch_stream(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    # ARROW-4836: roundtrip a RecordBatch through a compressed stream
    table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a'])
    raw = pa.BufferOutputStream()
    stream = pa.CompressedOutputStream(raw, compression)
    writer = pa.RecordBatchStreamWriter(stream, table.schema)
    writer.write_table(table, max_chunksize=3)
    writer.close()
    stream.close()  # Flush data
    buf = raw.getvalue()
    stream = pa.CompressedInputStream(pa.BufferReader(buf), compression)
    got_table = pa.RecordBatchStreamReader(stream).read_all()
    assert got_table == table

Example #7

Show file

def test_compressed_roundtrip(compression):
    data = b"some test data\n" * 10 + b"eof\n"
    raw = pa.BufferOutputStream()
    try:
        with pa.CompressedOutputStream(raw, compression) as compressed:
            compressed.write(data)
    except NotImplementedError as e:
        if compression == "bz2":
            pytest.skip(str(e))
        else:
            raise
    cdata = raw.getvalue()
    assert len(cdata) < len(data)
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, compression) as compressed:
        got = compressed.read()
        assert got == data

Example #8

Show file

def test_compressed_recordbatch_stream(compression):
    # ARROW-4836: roundtrip a RecordBatch through a compressed stream
    table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a'])
    raw = pa.BufferOutputStream()
    try:
        stream = pa.CompressedOutputStream(raw, compression)
    except NotImplementedError as e:
        if compression == "bz2":
            pytest.skip(str(e))
        else:
            raise
    writer = pa.RecordBatchStreamWriter(stream, table.schema)
    writer.write_table(table, max_chunksize=3)
    writer.close()
    stream.close()  # Flush data
    buf = raw.getvalue()
    stream = pa.CompressedInputStream(pa.BufferReader(buf), compression)
    got_table = pa.RecordBatchStreamReader(stream).read_all()
    assert got_table == table

Example #9

Show file

File: __init__.py Project: quiltdata/quilt

def write_data_as_arrow(data, schema, max_size):
    if isinstance(data, pyarrow.Table):
        data = data.to_batches(OUT_BATCH_SIZE)

    truncated = False
    buf = pyarrow.BufferOutputStream()
    with pyarrow.CompressedOutputStream(buf, "gzip") as sink:
        with pyarrow.ipc.new_file(sink, schema) as writer:
            for batch in data:
                batch_size = pyarrow.ipc.get_record_batch_size(batch)
                if ((max_size is not None
                     and sink.tell() + batch_size > max_size) or
                        # See a similar comment in GzipOutputBuffer.write().
                        buf.tell() + batch_size > MAX_OUT):
                    truncated = True
                    break
                writer.write(batch)

    return memoryview(buf.getvalue()), truncated