Beispiel #1
0
def test_empty(blocksize):
    raw_inner = io.BytesIO()
    raw = _ReadRecordWrapper(raw_inner)
    b = BlockBuffer(raw, blocksize)

    assert b.size == 0
    assert b.read() == b""
    assert raw.records == []
Beispiel #2
0
def test_caching_chunks2(raw):
    b = BlockBuffer(raw, 2)

    b.seek(3)
    b.read(1)
    assert raw.records == [(2, 2)]

    b.seek(0)
    b.read()
    assert raw.records == [(2, 2), (0, 2), (4, 2)]
Beispiel #3
0
def test_giga():
    raw_size = 100 * 1024**3  # 100 GB
    raw_inner = _ZeroFile(raw_size)
    raw = _ReadRecordWrapper(raw_inner)
    blocksize = 4 * 1024**2  # 4MB
    b = BlockBuffer(raw, blocksize)

    assert b.size == raw_size
    assert b.read(10) == b"\0" * 10
    assert b.seek(-10, 2) == raw_size - 10
    assert b.read(10) == b"\0" * 10
    assert raw.records == [(0, blocksize), (raw_size - blocksize, blocksize)]
Beispiel #4
0
def test_caching_reuse(raw):
    b = BlockBuffer(raw, 3)

    b.read(1)
    assert raw.records == [(0, 3)]

    b.seek(0)
    b.read(2)
    assert raw.records == [(0, 3)]
Beispiel #5
0
def get_physical_partition_stats(metapartitions, store):
    """
    Get statistics for partition.

    .. hint::
        To get the metapartitions pre-aligned, use ``concat_partitions_on_primary_index=True`` during dispatch.

    Parameters
    ----------
    metapartitions: Iterable[kartothek.io_components.metapartition.MetaPartition]
        Iterable of metapartitions belonging to the same physical partition.
    store: Union[simplekv.KeyValueStore, Callable[[], simplekv.KeyValueStore]]
        KV store.

    Returns
    -------
    stats: Dict[str, int]
        Statistics for the current partition.
    """
    if callable(store):
        store = store()

    files = 0
    blobsize = 0
    rows = 0
    for mp in metapartitions:
        for f in mp.files.values():
            files += 1
            fp = BlockBuffer(store.open(f))
            try:
                fp_parquet = pq.ParquetFile(fp)
                rows += fp_parquet.metadata.num_rows
                blobsize += fp.size
            finally:
                fp.close()

    return {
        "blobsize": blobsize,
        "files": files,
        "partitions": 1,
        "rows": rows
    }
Beispiel #6
0
def test_init_fails_not_readable():
    raw = io.BytesIO()
    raw.readable = lambda: False
    with pytest.raises(ValueError, match="raw must be readable"):
        BlockBuffer(raw)
Beispiel #7
0
def example_buffer(raw, blocksize):
    return BlockBuffer(raw, blocksize)
Beispiel #8
0
def test_real_file(tmpdir, blocksize):
    path = tmpdir.join("test_real_file.bin").strpath
    with open(path, "wb") as fp:
        fp.write(b"foxbar")

    real_file = open(path, "rb")

    b = BlockBuffer(real_file, blocksize)

    assert not b.closed

    assert b.size == 6
    assert b.seekable() is True
    assert b.readable() is True
    assert b.tell() == 0
    assert b.seek(1) == 1
    assert b.read() == b"oxbar"

    # final close
    b.close()

    # closing twice works
    b.close()
Beispiel #9
0
def test_closed(blocksize):
    raw = io.BytesIO()
    b = BlockBuffer(raw, blocksize)
    b.close()

    assert b.closed
    assert raw.closed

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.size

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.seekable()

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.readable()

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.tell()

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.seek(0)

    with pytest.raises(ValueError, match="I/O operation on closed file."):
        b.read()

    # closing twice works
    b.close()
Beispiel #10
0
def test_caching_remainder(raw):
    b = BlockBuffer(raw, 4)

    b.seek(5)
    b.read()
    assert raw.records == [(4, 2)]
Beispiel #11
0
def test_init_fails_closed(blocksize):
    raw = io.BytesIO()
    raw.close()
    with pytest.raises(ValueError, match="Cannot use closed file object"):
        BlockBuffer(raw, blocksize)
Beispiel #12
0
def test_init_fails_blocksize(blocksize):
    raw = io.BytesIO()
    with pytest.raises(ValueError, match="blocksize must be at least 1"):
        BlockBuffer(raw, blocksize)