Beispiel #1
0
def test_unknown_compression_raises():
    with pytest.raises(ValueError):
        Codec.is_available('unknown')
    with pytest.raises(TypeError):
        Codec(None)
    with pytest.raises(ValueError):
        Codec('unknown')
Beispiel #2
0
def test_compress_decompress(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    INPUT_SIZE = 10000
    test_data = (np.random.randint(0, 255, size=INPUT_SIZE)
                 .astype(np.uint8)
                 .tostring())
    test_buf = pa.py_buffer(test_data)

    compressed_buf = pa.compress(test_buf, codec=compression)
    compressed_bytes = pa.compress(test_data, codec=compression,
                                   asbytes=True)

    assert isinstance(compressed_bytes, bytes)

    decompressed_buf = pa.decompress(compressed_buf, INPUT_SIZE,
                                     codec=compression)
    decompressed_bytes = pa.decompress(compressed_bytes, INPUT_SIZE,
                                       codec=compression, asbytes=True)

    assert isinstance(decompressed_bytes, bytes)

    assert decompressed_buf.equals(test_buf)
    assert decompressed_bytes == test_data

    with pytest.raises(ValueError):
        pa.decompress(compressed_bytes, codec=compression)
Beispiel #3
0
def test_compression_detection(path, expected_compression):
    if not Codec.is_available(expected_compression):
        with pytest.raises(pa.lib.ArrowNotImplementedError):
            Codec.detect(path)
    else:
        codec = Codec.detect(path)
        assert isinstance(codec, Codec)
        assert codec.name == expected_compression
Beispiel #4
0
def test_output_stream_constructor(tmpdir):
    if not Codec.is_available("gzip"):
        pytest.skip("gzip support is not built")
    with pa.CompressedOutputStream(tmpdir / "ctor.gz", "gzip") as stream:
        stream.write(b"test")
    with (tmpdir / "ctor2.gz").open("wb") as f:
        with pa.CompressedOutputStream(f, "gzip") as stream:
            stream.write(b"test")
Beispiel #5
0
def test_compressed_roundtrip(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    data = b"some test data\n" * 10 + b"eof\n"
    raw = pa.BufferOutputStream()
    with pa.CompressedOutputStream(raw, compression) as compressed:
        compressed.write(data)

    cdata = raw.getvalue()
    assert len(cdata) < len(data)
    raw = pa.BufferReader(cdata)
    with pa.CompressedInputStream(raw, compression) as compressed:
        got = compressed.read()
        assert got == data
Beispiel #6
0
def test_compressed_recordbatch_stream(compression):
    if not Codec.is_available(compression):
        pytest.skip("{} support is not built".format(compression))

    # ARROW-4836: roundtrip a RecordBatch through a compressed stream
    table = pa.Table.from_arrays([pa.array([1, 2, 3, 4, 5])], ['a'])
    raw = pa.BufferOutputStream()
    stream = pa.CompressedOutputStream(raw, compression)
    writer = pa.RecordBatchStreamWriter(stream, table.schema)
    writer.write_table(table, max_chunksize=3)
    writer.close()
    stream.close()  # Flush data
    buf = raw.getvalue()
    stream = pa.CompressedInputStream(pa.BufferReader(buf), compression)
    got_table = pa.RecordBatchStreamReader(stream).read_all()
    assert got_table == table
Beispiel #7
0
    'nopandas',
    'orc',
    'pandas',
    'parquet',
    'plasma',
    's3',
    'snappy',
    'tensorflow',
    'flight',
    'slow',
    'requires_testing_data',
    'zstd',
]

defaults = {
    'brotli': Codec.is_available('brotli'),
    'bz2': Codec.is_available('bz2'),
    'cython': False,
    'dataset': False,
    'fastparquet': False,
    'flight': False,
    'gandiva': False,
    'gdb': True,
    'gzip': Codec.is_available('gzip'),
    'hdfs': False,
    'hypothesis': False,
    'large_memory': False,
    'lz4': Codec.is_available('lz4'),
    'memory_leak': False,
    'nopandas': False,
    'orc': False,