Esempi in Python per CompressedOutputStream

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pyarrow

Metodo/funzione: CompressedOutputStream

Esempi su hotexamples.com: 9

CompressedOutputStream in Python: 9 esempi trovati. Questi sono i migliori esempi reali in Python per pyarrow.CompressedOutputStream, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

def test_output_stream_constructor(tmpdir):
    if not Codec.is_available("gzip"):
        pytest.skip("gzip support is not built")
    with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream:
        stream.write(b"test")
    with (tmpdir / "ctor2.gz").open("wb") as f:
        with pa.CompressedOutputStream(f, "gzip") as stream:
            stream.write(b"test")

Esempio n. 2

Mostra file

File: use_arrow.py Progetto: hahagan/study

def test_compress_stream(batch):
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, "lz4") as compressed:
        pa.serialize(batch).write_to(compressed)
    cdata = raw.getvalue()
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, "lz4") as compressed:
        tmp = pa.deserialize(compressed.read())

Esempio n. 3

Mostra file

File: test_io.py Progetto: zhu2856061/arrow

def make_compressed_output(data, fn, compression):
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression) as compressed:
        assert not compressed.closed
        assert not compressed.readable()
        assert compressed.writable()
        assert not compressed.seekable()
        compressed.write(data)
    assert compressed.closed
    assert raw.closed
    with open(fn, "wb") as f:
        f.write(raw.getvalue())

Esempio n. 4

Mostra file

File: serialization.py Progetto: erickfaria/descarteslabs-python

def serialize_pyarrow(data: bytes, codec: str):
    """
    Serialize an object and compress with a specific codec. Returns the
    serialized, compressed bytes in a pyarrow.Buffer. The caller is
    responsible for reading the returned bytes into a file, if necessary.

    Should be used in conjunction with `deserialize_pyarrow`.
    """
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression=codec) as compressed:
        pa.serialize_to(data, compressed, context=serialization_context)
    return raw.getvalue()

Esempio n. 5

Mostra file

File: test_io.py Progetto: zhu2856061/arrow

def test_compressed_roundtrip(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    data = b"some test data\n" * 10 + b"eof\n"
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression) as compressed:
        compressed.write(data)

    cdata = raw.getvalue()
    assert len(cdata) < len(data)
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, compression) as compressed:
        got = compressed.read()
        assert got == data

Esempio n. 6

Mostra file

File: test_io.py Progetto: zhu2856061/arrow

def test_compressed_recordbatch_stream(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    # ARROW-4836: roundtrip a RecordBatch through a compressed stream
    table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a'])
    raw = pa.BufferOutputStream()
    stream = pa.CompressedOutputStream(raw, compression)
    writer = pa.RecordBatchStreamWriter(stream, table.schema)
    writer.write_table(table, max_chunksize=3)
    writer.close()
    stream.close()  # Flush data
    buf = raw.getvalue()
    stream = pa.CompressedInputStream(pa.BufferReader(buf), compression)
    got_table = pa.RecordBatchStreamReader(stream).read_all()
    assert got_table == table

Esempio n. 7

Mostra file

def test_compressed_roundtrip(compression):
    data = b"some test data\n" * 10 + b"eof\n"
    raw = pa.BufferOutputStream()
    try:
        with pa.CompressedOutputStream(raw, compression) as compressed:
            compressed.write(data)
    except NotImplementedError as e:
        if compression == "bz2":
            pytest.skip(str(e))
        else:
            raise
    cdata = raw.getvalue()
    assert len(cdata) < len(data)
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, compression) as compressed:
        got = compressed.read()
        assert got == data

Esempio n. 8

Mostra file

def test_compressed_recordbatch_stream(compression):
    # ARROW-4836: roundtrip a RecordBatch through a compressed stream
    table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a'])
    raw = pa.BufferOutputStream()
    try:
        stream = pa.CompressedOutputStream(raw, compression)
    except NotImplementedError as e:
        if compression == "bz2":
            pytest.skip(str(e))
        else:
            raise
    writer = pa.RecordBatchStreamWriter(stream, table.schema)
    writer.write_table(table, max_chunksize=3)
    writer.close()
    stream.close()  # Flush data
    buf = raw.getvalue()
    stream = pa.CompressedInputStream(pa.BufferReader(buf), compression)
    got_table = pa.RecordBatchStreamReader(stream).read_all()
    assert got_table == table

Esempio n. 9

Mostra file

File: __init__.py Progetto: quiltdata/quilt

def write_data_as_arrow(data, schema, max_size):
    if isinstance(data, pyarrow.Table):
        data = data.to_batches(OUT_BATCH_SIZE)

    truncated = False
    buf = pyarrow.BufferOutputStream()
    with pyarrow.CompressedOutputStream(buf, "gzip") as sink:
        with pyarrow.ipc.new_file(sink, schema) as writer:
            for batch in data:
                batch_size = pyarrow.ipc.get_record_batch_size(batch)
                if ((max_size is not None
                     and sink.tell() + batch_size > max_size) or
                        # See a similar comment in GzipOutputBuffer.write().
                        buf.tell() + batch_size > MAX_OUT):
                    truncated = True
                    break
                writer.write(batch)

    return memoryview(buf.getvalue()), truncated